1 /* $OpenBSD: pmap.c,v 1.111 2023/04/13 15:23:22 miod Exp $ */ 2 /* $NetBSD: pmap.c,v 1.107 2001/08/31 16:47:41 eeh Exp $ */ 3 /* 4 * 5 * Copyright (C) 1996-1999 Eduardo Horvath. 6 * All rights reserved. 7 * 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 #include <sys/atomic.h> 30 #include <sys/param.h> 31 #include <sys/malloc.h> 32 #include <sys/queue.h> 33 #include <sys/systm.h> 34 #include <sys/proc.h> 35 #include <sys/msgbuf.h> 36 #include <sys/pool.h> 37 #include <sys/exec.h> 38 #include <sys/core.h> 39 #include <sys/kcore.h> 40 41 #include <uvm/uvm.h> 42 43 #include <machine/pcb.h> 44 #include <machine/sparc64.h> 45 #include <machine/ctlreg.h> 46 #include <machine/hypervisor.h> 47 #include <machine/openfirm.h> 48 #include <machine/kcore.h> 49 50 #include "cache.h" 51 52 #ifdef DDB 53 #include <machine/db_machdep.h> 54 #include <ddb/db_command.h> 55 #include <ddb/db_sym.h> 56 #include <ddb/db_variables.h> 57 #include <ddb/db_extern.h> 58 #include <ddb/db_access.h> 59 #include <ddb/db_output.h> 60 #define db_enter() __asm volatile("ta 1; nop"); 61 #else 62 #define db_enter() 63 #define db_printf printf 64 #endif 65 66 #define MEG (1<<20) /* 1MB */ 67 #define KB (1<<10) /* 1KB */ 68 69 paddr_t cpu0paddr;/* XXXXXXXXXXXXXXXX */ 70 71 /* These routines are in assembly to allow access thru physical mappings */ 72 extern int64_t pseg_get(struct pmap*, vaddr_t addr); 73 extern int pseg_set(struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare); 74 75 extern void pmap_zero_phys(paddr_t pa); 76 extern void pmap_copy_phys(paddr_t src, paddr_t dst); 77 78 /* 79 * Diatribe on ref/mod counting: 80 * 81 * First of all, ref/mod info must be non-volatile. Hence we need to keep it 82 * in the pv_entry structure for each page. (We could bypass this for the 83 * vm_page, but that's a long story....) 84 * 85 * This architecture has nice, fast traps with lots of space for software bits 86 * in the TTE. To accelerate ref/mod counts we make use of these features. 87 * 88 * When we map a page initially, we place a TTE in the page table. It's 89 * inserted with the TLB_W and TLB_ACCESS bits cleared. If a page is really 90 * writeable we set the TLB_REAL_W bit for the trap handler. 91 * 92 * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS 93 * bit in the appropriate TTE in the page table. Whenever we take a protection 94 * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD 95 * bits to enable writing and mark the page as modified. 96 * 97 * This means that we may have ref/mod information all over the place. The 98 * pmap routines must traverse the page tables of all pmaps with a given page 99 * and collect/clear all the ref/mod information and copy it into the pv_entry. 100 */ 101 102 #define PV_ALIAS 0x1LL 103 #define PV_REF 0x2LL 104 #define PV_MOD 0x4LL 105 #define PV_MASK (0x03fLL) 106 #define PV_VAMASK (~(NBPG - 1)) 107 #define PV_MATCH(pv,va) (!((((pv)->pv_va) ^ (va)) & PV_VAMASK)) 108 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | (((pv)->pv_va) & PV_MASK))) 109 110 static struct pool pv_pool; 111 static struct pool pmap_pool; 112 113 pv_entry_t pmap_remove_pv(struct pmap *pm, vaddr_t va, paddr_t pa); 114 pv_entry_t pmap_enter_pv(struct pmap *pm, pv_entry_t, vaddr_t va, paddr_t pa); 115 void pmap_page_cache(struct pmap *pm, paddr_t pa, int mode); 116 117 void pmap_bootstrap_cpu(paddr_t); 118 119 void pmap_pinit(struct pmap *); 120 void pmap_release(struct pmap *); 121 pv_entry_t pa_to_pvh(paddr_t); 122 123 pv_entry_t 124 pa_to_pvh(paddr_t pa) 125 { 126 struct vm_page *pg; 127 128 pg = PHYS_TO_VM_PAGE(pa); 129 return pg ? &pg->mdpage.pvent : NULL; 130 } 131 132 static __inline u_int 133 pmap_tte2flags(u_int64_t tte) 134 { 135 if (CPU_ISSUN4V) 136 return (((tte & SUN4V_TLB_ACCESS) ? PV_REF : 0) | 137 ((tte & SUN4V_TLB_MODIFY) ? PV_MOD : 0)); 138 else 139 return (((tte & SUN4U_TLB_ACCESS) ? PV_REF : 0) | 140 ((tte & SUN4U_TLB_MODIFY) ? PV_MOD : 0)); 141 } 142 143 /* 144 * Here's the CPU TSB stuff. It's allocated in pmap_bootstrap. 145 */ 146 pte_t *tsb_dmmu; 147 pte_t *tsb_immu; 148 int tsbsize; /* tsbents = 512 * 2^tsbsize */ 149 #define TSBENTS (512 << tsbsize) 150 #define TSBSIZE (TSBENTS * 16) 151 152 /* 153 * The invalid tsb tag uses the fact that the last context we have is 154 * never allocated. 155 */ 156 #define TSB_TAG_INVALID (~0LL << 48) 157 158 #define TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) \ 159 (CPU_ISSUN4V ?\ 160 SUN4V_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) : \ 161 SUN4U_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie)) 162 163 /* The same for sun4u and sun4v. */ 164 #define TLB_V SUN4U_TLB_V 165 166 /* Only used for DEBUG. */ 167 #define TLB_NFO (CPU_ISSUN4V ? SUN4V_TLB_NFO : SUN4U_TLB_NFO) 168 169 /* 170 * UltraSPARC T1 & T2 implement only a 40-bit real address range, just 171 * like older UltraSPARC CPUs. 172 */ 173 #define TLB_PA_MASK SUN4U_TLB_PA_MASK 174 175 /* XXX */ 176 #define TLB_TSB_LOCK (CPU_ISSUN4V ? SUN4V_TLB_TSB_LOCK : SUN4U_TLB_TSB_LOCK) 177 178 #ifdef SUN4V 179 struct tsb_desc *tsb_desc; 180 #endif 181 182 struct pmap kernel_pmap_; 183 184 /* 185 * Virtual and physical addresses of the start and end of kernel text 186 * and data segments. 187 */ 188 vaddr_t ktext; 189 paddr_t ktextp; 190 vaddr_t ektext; 191 paddr_t ektextp; 192 vaddr_t kdata; 193 paddr_t kdatap; 194 vaddr_t ekdata; 195 paddr_t ekdatap; 196 197 static struct mem_region memlist[8]; /* Pick a random size here */ 198 199 vaddr_t vmmap; /* one reserved MI vpage for /dev/mem */ 200 201 struct mem_region *mem, *avail, *orig; 202 int memsize; 203 204 static int memh = 0, vmemh = 0; /* Handles to OBP devices */ 205 206 static int ptelookup_va(vaddr_t va); /* sun4u */ 207 208 static __inline void 209 tsb_invalidate(int ctx, vaddr_t va) 210 { 211 int i; 212 int64_t tag; 213 214 i = ptelookup_va(va); 215 tag = TSB_TAG(0, ctx, va); 216 if (tsb_dmmu[i].tag == tag) 217 atomic_cas_ulong((volatile unsigned long *)&tsb_dmmu[i].tag, 218 tag, TSB_TAG_INVALID); 219 if (tsb_immu[i].tag == tag) 220 atomic_cas_ulong((volatile unsigned long *)&tsb_immu[i].tag, 221 tag, TSB_TAG_INVALID); 222 } 223 224 struct prom_map *prom_map; 225 int prom_map_size; 226 227 #ifdef DEBUG 228 #define PDB_BOOT 0x20000 229 #define PDB_BOOT1 0x40000 230 int pmapdebug = 0; 231 232 #define BDPRINTF(n, f) if (pmapdebug & (n)) prom_printf f 233 #else 234 #define BDPRINTF(n, f) 235 #endif 236 237 /* 238 * 239 * A context is simply a small number that differentiates multiple mappings 240 * of the same address. Contexts on the spitfire are 13 bits, but could 241 * be as large as 17 bits. 242 * 243 * Each context is either free or attached to a pmap. 244 * 245 * The context table is an array of pointers to psegs. Just dereference 246 * the right pointer and you get to the pmap segment tables. These are 247 * physical addresses, of course. 248 * 249 */ 250 paddr_t *ctxbusy; 251 int numctx; 252 #define CTXENTRY (sizeof(paddr_t)) 253 #define CTXSIZE (numctx * CTXENTRY) 254 255 int pmap_get_page(paddr_t *, const char *, struct pmap *); 256 void pmap_free_page(paddr_t, struct pmap *); 257 258 /* 259 * Support for big page sizes. This maps the page size to the 260 * page bits. That is: these are the bits between 8K pages and 261 * larger page sizes that cause aliasing. 262 */ 263 const struct page_size_map page_size_map[] = { 264 { (4*1024*1024-1) & ~(8*1024-1), PGSZ_4M }, 265 { (512*1024-1) & ~(8*1024-1), PGSZ_512K }, 266 { (64*1024-1) & ~(8*1024-1), PGSZ_64K }, 267 { (8*1024-1) & ~(8*1024-1), PGSZ_8K }, 268 { 0, 0 } 269 }; 270 271 /* 272 * Enter a TTE into the kernel pmap only. Don't do anything else. 273 * 274 * Use only during bootstrapping since it does no locking and 275 * can lose ref/mod info!!!! 276 * 277 */ 278 static void 279 pmap_enter_kpage(vaddr_t va, int64_t data) 280 { 281 paddr_t newp; 282 283 newp = 0; 284 while (pseg_set(pmap_kernel(), va, data, newp) == 1) { 285 newp = 0; 286 if (!pmap_get_page(&newp, NULL, pmap_kernel())) { 287 prom_printf("pmap_enter_kpage: out of pages\n"); 288 panic("pmap_enter_kpage"); 289 } 290 291 BDPRINTF(PDB_BOOT1, 292 ("pseg_set: pm=%p va=%p data=%lx newp %lx\r\n", 293 pmap_kernel(), va, (long)data, (long)newp)); 294 } 295 } 296 297 /* 298 * Check bootargs to see if we need to enable bootdebug. 299 */ 300 #ifdef DEBUG 301 void 302 pmap_bootdebug(void) 303 { 304 int chosen; 305 char *cp; 306 char buf[128]; 307 308 /* 309 * Grab boot args from PROM 310 */ 311 chosen = OF_finddevice("/chosen"); 312 /* Setup pointer to boot flags */ 313 OF_getprop(chosen, "bootargs", buf, sizeof(buf)); 314 cp = buf; 315 while (*cp != '-') 316 if (*cp++ == '\0') 317 return; 318 for (;;) 319 switch (*++cp) { 320 case '\0': 321 return; 322 case 'V': 323 pmapdebug |= PDB_BOOT|PDB_BOOT1; 324 break; 325 case 'D': 326 pmapdebug |= PDB_BOOT1; 327 break; 328 } 329 } 330 #endif 331 332 /* 333 * This is called during bootstrap, before the system is really initialized. 334 * 335 * It's called with the start and end virtual addresses of the kernel. We 336 * bootstrap the pmap allocator now. We will allocate the basic structures we 337 * need to bootstrap the VM system here: the page frame tables, the TSB, and 338 * the free memory lists. 339 * 340 * Now all this is becoming a bit obsolete. maxctx is still important, but by 341 * separating the kernel text and data segments we really would need to 342 * provide the start and end of each segment. But we can't. The rodata 343 * segment is attached to the end of the kernel segment and has nothing to 344 * delimit its end. We could still pass in the beginning of the kernel and 345 * the beginning and end of the data segment but we could also just as easily 346 * calculate that all in here. 347 * 348 * To handle the kernel text, we need to do a reverse mapping of the start of 349 * the kernel, then traverse the free memory lists to find out how big it is. 350 */ 351 352 void 353 pmap_bootstrap(u_long kernelstart, u_long kernelend, u_int maxctx, u_int numcpus) 354 { 355 extern int data_start[], end[]; /* start of data segment */ 356 extern int msgbufmapped; 357 struct mem_region *mp, *mp1; 358 int msgbufsiz; 359 int pcnt; 360 size_t s, sz; 361 int i, j; 362 int64_t data; 363 vaddr_t va; 364 u_int64_t phys_msgbuf; 365 paddr_t newkp; 366 vaddr_t newkv, firstaddr, intstk; 367 vsize_t kdsize, ktsize; 368 369 #ifdef DEBUG 370 pmap_bootdebug(); 371 #endif 372 373 BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\r\n")); 374 /* 375 * set machine page size 376 */ 377 uvmexp.pagesize = NBPG; 378 uvm_setpagesize(); 379 380 /* 381 * Find out how big the kernel's virtual address 382 * space is. The *$#@$ prom loses this info 383 */ 384 if ((vmemh = OF_finddevice("/virtual-memory")) == -1) { 385 prom_printf("no virtual-memory?"); 386 OF_exit(); 387 } 388 bzero((caddr_t)memlist, sizeof(memlist)); 389 if (OF_getprop(vmemh, "available", memlist, sizeof(memlist)) <= 0) { 390 prom_printf("no vmemory avail?"); 391 OF_exit(); 392 } 393 394 #ifdef DEBUG 395 if (pmapdebug & PDB_BOOT) { 396 /* print out mem list */ 397 prom_printf("Available virtual memory:\r\n"); 398 for (mp = memlist; mp->size; mp++) { 399 prom_printf("memlist start %p size %lx\r\n", 400 (void *)(u_long)mp->start, 401 (u_long)mp->size); 402 } 403 prom_printf("End of available virtual memory\r\n"); 404 } 405 #endif 406 /* 407 * Get hold or the message buffer. 408 */ 409 msgbufp = (struct msgbuf *)(vaddr_t)MSGBUF_VA; 410 /* XXXXX -- increase msgbufsiz for uvmhist printing */ 411 msgbufsiz = 4*NBPG /* round_page(sizeof(struct msgbuf)) */; 412 BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\r\n", 413 (long)msgbufp, (long)msgbufsiz)); 414 if ((long)msgbufp != 415 (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz))) 416 prom_printf( 417 "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\r\n", 418 (void *)msgbufp, (long)phys_msgbuf); 419 phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN); 420 BDPRINTF(PDB_BOOT, 421 ("We should have the memory at %lx, let's map it in\r\n", 422 phys_msgbuf)); 423 if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, 424 -1/* sunos does this */) == -1) 425 prom_printf("Failed to map msgbuf\r\n"); 426 else 427 BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\r\n", 428 (void *)msgbufp)); 429 msgbufmapped = 1; /* enable message buffer */ 430 initmsgbuf((caddr_t)msgbufp, msgbufsiz); 431 432 /* 433 * Record kernel mapping -- we will map these with a permanent 4MB 434 * TLB entry when we initialize the CPU later. 435 */ 436 BDPRINTF(PDB_BOOT, ("translating kernelstart %p\r\n", 437 (void *)kernelstart)); 438 ktext = kernelstart; 439 ktextp = prom_vtop(kernelstart); 440 441 kdata = (vaddr_t)data_start; 442 kdatap = prom_vtop(kdata); 443 ekdata = (vaddr_t)end; 444 445 /* 446 * Find the real size of the kernel. Locate the smallest starting 447 * address > kernelstart. 448 */ 449 for (mp1 = mp = memlist; mp->size; mp++) { 450 /* 451 * Check whether this region is at the end of the kernel. 452 */ 453 if (mp->start >= ekdata && (mp1->start < ekdata || 454 mp1->start > mp->start)) 455 mp1 = mp; 456 } 457 if (mp1->start < kdata) 458 prom_printf("Kernel at end of vmem???\r\n"); 459 460 BDPRINTF(PDB_BOOT1, 461 ("Kernel data is mapped at %lx, next free seg: %lx, %lx\r\n", 462 (long)kdata, (u_long)mp1->start, (u_long)mp1->size)); 463 464 /* 465 * We save where we can start allocating memory. 466 */ 467 firstaddr = (ekdata + 07) & ~ 07; /* Longword align */ 468 469 /* 470 * We reserve 100K to grow. 471 */ 472 ekdata += 100*KB; 473 474 /* 475 * And set the end of the data segment to the end of what our 476 * bootloader allocated for us, if we still fit in there. 477 */ 478 if (ekdata < mp1->start) 479 ekdata = mp1->start; 480 481 #define valloc(name, type, num) (name) = (type *)firstaddr; firstaddr += (num) 482 483 /* 484 * Since we can't always give the loader the hint to align us on a 4MB 485 * boundary, we will need to do the alignment ourselves. First 486 * allocate a new 4MB aligned segment for the kernel, then map it 487 * in, copy the kernel over, swap mappings, then finally, free the 488 * old kernel. Then we can continue with this. 489 * 490 * We'll do the data segment up here since we know how big it is. 491 * We'll do the text segment after we've read in the PROM translations 492 * so we can figure out its size. 493 * 494 * The ctxbusy table takes about 64KB, the TSB up to 32KB, and the 495 * rest should be less than 1K, so 100KB extra should be plenty. 496 */ 497 kdsize = round_page(ekdata - kdata); 498 BDPRINTF(PDB_BOOT1, ("Kernel data size is %lx\r\n", (long)kdsize)); 499 500 if ((kdatap & (4*MEG-1)) == 0) { 501 /* We were at a 4MB boundary -- claim the rest */ 502 psize_t szdiff = (4*MEG - kdsize) & (4*MEG - 1); 503 504 BDPRINTF(PDB_BOOT1, ("Need to extend dseg by %lx\r\n", 505 (long)szdiff)); 506 if (szdiff) { 507 /* Claim the rest of the physical page. */ 508 newkp = kdatap + kdsize; 509 newkv = kdata + kdsize; 510 if (newkp != prom_claim_phys(newkp, szdiff)) { 511 prom_printf("pmap_bootstrap: could not claim " 512 "physical dseg extension " 513 "at %lx size %lx\r\n", 514 newkp, szdiff); 515 goto remap_data; 516 } 517 518 /* And the rest of the virtual page. */ 519 if (prom_claim_virt(newkv, szdiff) != newkv) 520 prom_printf("pmap_bootstrap: could not claim " 521 "virtual dseg extension " 522 "at size %lx\r\n", newkv, szdiff); 523 524 /* Make sure all 4MB are mapped */ 525 prom_map_phys(newkp, szdiff, newkv, -1); 526 } 527 } else { 528 psize_t sz; 529 remap_data: 530 /* 531 * Either we're not at a 4MB boundary or we can't get the rest 532 * of the 4MB extension. We need to move the data segment. 533 * Leave 1MB of extra fiddle space in the calculations. 534 */ 535 536 sz = (kdsize + 4*MEG - 1) & ~(4*MEG-1); 537 BDPRINTF(PDB_BOOT1, 538 ("Allocating new %lx kernel data at 4MB boundary\r\n", 539 (u_long)sz)); 540 if ((newkp = prom_alloc_phys(sz, 4*MEG)) == (paddr_t)-1 ) { 541 prom_printf("Cannot allocate new kernel\r\n"); 542 OF_exit(); 543 } 544 BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n", 545 (u_int64_t)newkp)); 546 if ((newkv = (vaddr_t)prom_alloc_virt(sz, 8)) == 547 (vaddr_t)-1) { 548 prom_printf("Cannot allocate new kernel va\r\n"); 549 OF_exit(); 550 } 551 BDPRINTF(PDB_BOOT1, ("Mapping in buffer %llx at %llx\r\n", 552 (u_int64_t)newkp, (u_int64_t)newkv)); 553 prom_map_phys(newkp, sz, (vaddr_t)newkv, -1); 554 BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel data...", 555 kdsize)); 556 bzero((void *)newkv, sz); 557 bcopy((void *)kdata, (void *)newkv, kdsize); 558 BDPRINTF(PDB_BOOT1, ("done. Swapping maps..unmap new\r\n")); 559 prom_unmap_virt((vaddr_t)newkv, sz); 560 BDPRINTF(PDB_BOOT, ("remap old ")); 561 #if 0 562 /* 563 * calling the prom will probably require reading part of the 564 * data segment so we can't do this. */ 565 prom_unmap_virt((vaddr_t)kdatap, kdsize); 566 #endif 567 prom_map_phys(newkp, sz, kdata, -1); 568 /* 569 * we will map in 4MB, more than we allocated, to allow 570 * further allocation 571 */ 572 BDPRINTF(PDB_BOOT1, ("free old\r\n")); 573 prom_free_phys(kdatap, kdsize); 574 kdatap = newkp; 575 BDPRINTF(PDB_BOOT1, 576 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)" 577 "avail for kernel\r\n", (u_long)firstaddr, 578 (u_long)prom_vtop(firstaddr))); 579 } 580 581 /* 582 * Find out how much RAM we have installed. 583 */ 584 BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\r\n")); 585 if ((memh = OF_finddevice("/memory")) == -1) { 586 prom_printf("no memory?"); 587 OF_exit(); 588 } 589 memsize = OF_getproplen(memh, "reg") + 2 * sizeof(struct mem_region); 590 valloc(mem, struct mem_region, memsize); 591 bzero((caddr_t)mem, memsize); 592 if (OF_getprop(memh, "reg", mem, memsize) <= 0) { 593 prom_printf("no memory installed?"); 594 OF_exit(); 595 } 596 597 #ifdef DEBUG 598 if (pmapdebug & PDB_BOOT1) { 599 /* print out mem list */ 600 prom_printf("Installed physical memory:\r\n"); 601 for (mp = mem; mp->size; mp++) { 602 prom_printf("memlist start %lx size %lx\r\n", 603 (u_long)mp->start, (u_long)mp->size); 604 } 605 } 606 #endif 607 BDPRINTF(PDB_BOOT1, ("Calculating physmem:")); 608 609 for (mp = mem; mp->size; mp++) 610 physmem += atop(mp->size); 611 BDPRINTF(PDB_BOOT1, (" result %x or %d pages\r\n", 612 (int)physmem, (int)physmem)); 613 614 /* 615 * Calculate approx TSB size. 616 */ 617 tsbsize = 0; 618 #ifdef SMALL_KERNEL 619 while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 2) 620 #else 621 while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 7) 622 #endif 623 tsbsize++; 624 625 /* 626 * Save the prom translations 627 */ 628 sz = OF_getproplen(vmemh, "translations"); 629 valloc(prom_map, struct prom_map, sz); 630 if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <= 0) { 631 prom_printf("no translations installed?"); 632 OF_exit(); 633 } 634 prom_map_size = sz / sizeof(struct prom_map); 635 #ifdef DEBUG 636 if (pmapdebug & PDB_BOOT) { 637 /* print out mem list */ 638 prom_printf("Prom xlations:\r\n"); 639 for (i = 0; i < prom_map_size; i++) { 640 prom_printf("start %016lx size %016lx tte %016lx\r\n", 641 (u_long)prom_map[i].vstart, 642 (u_long)prom_map[i].vsize, 643 (u_long)prom_map[i].tte); 644 } 645 prom_printf("End of prom xlations\r\n"); 646 } 647 #endif 648 /* 649 * Hunt for the kernel text segment and figure out it size and 650 * alignment. 651 */ 652 ktsize = 0; 653 for (i = 0; i < prom_map_size; i++) 654 if (prom_map[i].vstart == ktext + ktsize) 655 ktsize += prom_map[i].vsize; 656 if (ktsize == 0) 657 panic("No kernel text segment!"); 658 ektext = ktext + ktsize; 659 660 if (ktextp & (4*MEG-1)) { 661 /* Kernel text is not 4MB aligned -- need to fix that */ 662 BDPRINTF(PDB_BOOT1, 663 ("Allocating new %lx kernel text at 4MB boundary\r\n", 664 (u_long)ktsize)); 665 if ((newkp = prom_alloc_phys(ktsize, 4*MEG)) == 0 ) { 666 prom_printf("Cannot allocate new kernel text\r\n"); 667 OF_exit(); 668 } 669 BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n", 670 (u_int64_t)newkp)); 671 if ((newkv = (vaddr_t)prom_alloc_virt(ktsize, 8)) == 672 (vaddr_t)-1) { 673 prom_printf("Cannot allocate new kernel text va\r\n"); 674 OF_exit(); 675 } 676 BDPRINTF(PDB_BOOT1, ("Mapping in buffer %lx at %lx\r\n", 677 (u_long)newkp, (u_long)newkv)); 678 prom_map_phys(newkp, ktsize, (vaddr_t)newkv, -1); 679 BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel text...", 680 ktsize)); 681 bcopy((void *)ktext, (void *)newkv, 682 ktsize); 683 BDPRINTF(PDB_BOOT1, ("done. Swapping maps..unmap new\r\n")); 684 prom_unmap_virt((vaddr_t)newkv, 4*MEG); 685 BDPRINTF(PDB_BOOT, ("remap old ")); 686 #if 0 687 /* 688 * calling the prom will probably require reading part of the 689 * text segment so we can't do this. 690 */ 691 prom_unmap_virt((vaddr_t)ktextp, ktsize); 692 #endif 693 prom_map_phys(newkp, ktsize, ktext, -1); 694 /* 695 * we will map in 4MB, more than we allocated, to allow 696 * further allocation 697 */ 698 BDPRINTF(PDB_BOOT1, ("free old\r\n")); 699 prom_free_phys(ktextp, ktsize); 700 ktextp = newkp; 701 702 BDPRINTF(PDB_BOOT1, 703 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)" 704 "avail for kernel\r\n", (u_long)firstaddr, 705 (u_long)prom_vtop(firstaddr))); 706 707 /* 708 * Re-fetch translations -- they've certainly changed. 709 */ 710 if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <= 711 0) { 712 prom_printf("no translations installed?"); 713 OF_exit(); 714 } 715 #ifdef DEBUG 716 if (pmapdebug & PDB_BOOT) { 717 /* print out mem list */ 718 prom_printf("New prom xlations:\r\n"); 719 for (i = 0; i < prom_map_size; i++) { 720 prom_printf("start %016lx size %016lx tte %016lx\r\n", 721 (u_long)prom_map[i].vstart, 722 (u_long)prom_map[i].vsize, 723 (u_long)prom_map[i].tte); 724 } 725 prom_printf("End of prom xlations\r\n"); 726 } 727 #endif 728 } 729 ektextp = ktextp + ktsize; 730 731 /* 732 * Here's a quick in-lined reverse bubble sort. It gets rid of 733 * any translations inside the kernel data VA range. 734 */ 735 for(i = 0; i < prom_map_size; i++) { 736 if (prom_map[i].vstart >= kdata && 737 prom_map[i].vstart <= firstaddr) { 738 prom_map[i].vstart = 0; 739 prom_map[i].vsize = 0; 740 } 741 if (prom_map[i].vstart >= ktext && 742 prom_map[i].vstart <= ektext) { 743 prom_map[i].vstart = 0; 744 prom_map[i].vsize = 0; 745 } 746 for(j = i; j < prom_map_size; j++) { 747 if (prom_map[j].vstart >= kdata && 748 prom_map[j].vstart <= firstaddr) 749 continue; /* this is inside the kernel */ 750 if (prom_map[j].vstart >= ktext && 751 prom_map[j].vstart <= ektext) 752 continue; /* this is inside the kernel */ 753 if (prom_map[j].vstart > prom_map[i].vstart) { 754 struct prom_map tmp; 755 tmp = prom_map[i]; 756 prom_map[i] = prom_map[j]; 757 prom_map[j] = tmp; 758 } 759 } 760 } 761 #ifdef DEBUG 762 if (pmapdebug & PDB_BOOT) { 763 /* print out mem list */ 764 prom_printf("Prom xlations:\r\n"); 765 for (i = 0; i < prom_map_size; i++) { 766 prom_printf("start %016lx size %016lx tte %016lx\r\n", 767 (u_long)prom_map[i].vstart, 768 (u_long)prom_map[i].vsize, 769 (u_long)prom_map[i].tte); 770 } 771 prom_printf("End of prom xlations\r\n"); 772 } 773 #endif 774 775 /* 776 * Allocate a 64KB page for the cpu_info structure now. 777 */ 778 if ((cpu0paddr = prom_alloc_phys(numcpus * 8*NBPG, 8*NBPG)) == 0 ) { 779 prom_printf("Cannot allocate new cpu_info\r\n"); 780 OF_exit(); 781 } 782 783 /* 784 * Now the kernel text segment is in its final location we can try to 785 * find out how much memory really is free. 786 */ 787 sz = OF_getproplen(memh, "available") + sizeof(struct mem_region); 788 valloc(orig, struct mem_region, sz); 789 bzero((caddr_t)orig, sz); 790 if (OF_getprop(memh, "available", orig, sz) <= 0) { 791 prom_printf("no available RAM?"); 792 OF_exit(); 793 } 794 #ifdef DEBUG 795 if (pmapdebug & PDB_BOOT1) { 796 /* print out mem list */ 797 prom_printf("Available physical memory:\r\n"); 798 for (mp = orig; mp->size; mp++) { 799 prom_printf("memlist start %lx size %lx\r\n", 800 (u_long)mp->start, (u_long)mp->size); 801 } 802 prom_printf("End of available physical memory\r\n"); 803 } 804 #endif 805 valloc(avail, struct mem_region, sz); 806 bzero((caddr_t)avail, sz); 807 for (pcnt = 0, mp = orig, mp1 = avail; (mp1->size = mp->size); 808 mp++, mp1++) { 809 mp1->start = mp->start; 810 pcnt++; 811 } 812 813 /* 814 * Allocate and initialize a context table 815 */ 816 numctx = maxctx; 817 valloc(ctxbusy, paddr_t, CTXSIZE); 818 bzero((caddr_t)ctxbusy, CTXSIZE); 819 820 /* 821 * Allocate our TSB. 822 * 823 * We will use the left over space to flesh out the kernel pmap. 824 */ 825 BDPRINTF(PDB_BOOT1, ("firstaddr before TSB=%lx\r\n", 826 (u_long)firstaddr)); 827 firstaddr = ((firstaddr + TSBSIZE - 1) & ~(TSBSIZE-1)); 828 #ifdef DEBUG 829 i = (firstaddr + (NBPG-1)) & ~(NBPG-1); /* First, page align */ 830 if ((int)firstaddr < i) { 831 prom_printf("TSB alloc fixup failed\r\n"); 832 prom_printf("frobbed i, firstaddr before TSB=%x, %lx\r\n", 833 (int)i, (u_long)firstaddr); 834 panic("TSB alloc"); 835 OF_exit(); 836 } 837 #endif 838 BDPRINTF(PDB_BOOT, ("frobbed i, firstaddr before TSB=%x, %lx\r\n", 839 (int)i, (u_long)firstaddr)); 840 valloc(tsb_dmmu, pte_t, TSBSIZE); 841 bzero(tsb_dmmu, TSBSIZE); 842 valloc(tsb_immu, pte_t, TSBSIZE); 843 bzero(tsb_immu, TSBSIZE); 844 845 BDPRINTF(PDB_BOOT1, ("firstaddr after TSB=%lx\r\n", (u_long)firstaddr)); 846 BDPRINTF(PDB_BOOT1, ("TSB allocated at %p size %08x\r\n", (void *)tsb_dmmu, 847 (int)TSBSIZE)); 848 849 #ifdef SUN4V 850 if (CPU_ISSUN4V) { 851 valloc(tsb_desc, struct tsb_desc, sizeof(struct tsb_desc)); 852 bzero(tsb_desc, sizeof(struct tsb_desc)); 853 tsb_desc->td_idxpgsz = 0; 854 tsb_desc->td_assoc = 1; 855 tsb_desc->td_size = TSBENTS; 856 tsb_desc->td_ctxidx = -1; 857 tsb_desc->td_pgsz = 0xf; 858 tsb_desc->td_pa = (paddr_t)tsb_dmmu + kdatap - kdata; 859 } 860 #endif 861 862 BDPRINTF(PDB_BOOT1, ("firstaddr after pmap=%08lx\r\n", 863 (u_long)firstaddr)); 864 865 /* 866 * Page align all regions. 867 * Non-page memory isn't very interesting to us. 868 * Also, sort the entries for ascending addresses. 869 * 870 * And convert from virtual to physical addresses. 871 */ 872 873 BDPRINTF(PDB_BOOT, ("kernel virtual size %08lx - %08lx\r\n", 874 (u_long)kernelstart, (u_long)firstaddr)); 875 kdata = kdata & ~PGOFSET; 876 ekdata = firstaddr; 877 ekdata = (ekdata + PGOFSET) & ~PGOFSET; 878 BDPRINTF(PDB_BOOT1, ("kernel virtual size %08lx - %08lx\r\n", 879 (u_long)kernelstart, (u_long)kernelend)); 880 ekdatap = ekdata - kdata + kdatap; 881 /* Switch from vaddrs to paddrs */ 882 if(ekdatap > (kdatap + 4*MEG)) { 883 prom_printf("Kernel size exceeds 4MB\r\n"); 884 } 885 886 #ifdef DEBUG 887 if (pmapdebug & PDB_BOOT1) { 888 /* print out mem list */ 889 prom_printf("Available %lx physical memory before cleanup:\r\n", 890 (u_long)avail); 891 for (mp = avail; mp->size; mp++) { 892 prom_printf("memlist start %lx size %lx\r\n", 893 (u_long)mp->start, 894 (u_long)mp->size); 895 } 896 prom_printf("End of available physical memory before cleanup\r\n"); 897 prom_printf("kernel physical text size %08lx - %08lx\r\n", 898 (u_long)ktextp, (u_long)ektextp); 899 prom_printf("kernel physical data size %08lx - %08lx\r\n", 900 (u_long)kdatap, (u_long)ekdatap); 901 } 902 #endif 903 /* 904 * Here's a another quick in-lined bubble sort. 905 */ 906 for (i = 0; i < pcnt; i++) { 907 for (j = i; j < pcnt; j++) { 908 if (avail[j].start < avail[i].start) { 909 struct mem_region tmp; 910 tmp = avail[i]; 911 avail[i] = avail[j]; 912 avail[j] = tmp; 913 } 914 } 915 } 916 917 /* Throw away page zero if we have it. */ 918 if (avail->start == 0) { 919 avail->start += NBPG; 920 avail->size -= NBPG; 921 } 922 /* 923 * Now we need to remove the area we valloc'ed from the available 924 * memory lists. (NB: we may have already alloc'ed the entire space). 925 */ 926 for (mp = avail; mp->size; mp++) { 927 /* 928 * Check whether this region holds all of the kernel. 929 */ 930 s = mp->start + mp->size; 931 if (mp->start < kdatap && s > roundup(ekdatap, 4*MEG)) { 932 avail[pcnt].start = roundup(ekdatap, 4*MEG); 933 avail[pcnt++].size = s - kdatap; 934 mp->size = kdatap - mp->start; 935 } 936 /* 937 * Look whether this regions starts within the kernel. 938 */ 939 if (mp->start >= kdatap && 940 mp->start < roundup(ekdatap, 4*MEG)) { 941 s = ekdatap - mp->start; 942 if (mp->size > s) 943 mp->size -= s; 944 else 945 mp->size = 0; 946 mp->start = roundup(ekdatap, 4*MEG); 947 } 948 /* 949 * Now look whether this region ends within the kernel. 950 */ 951 s = mp->start + mp->size; 952 if (s > kdatap && s < roundup(ekdatap, 4*MEG)) 953 mp->size -= s - kdatap; 954 /* 955 * Now page align the start of the region. 956 */ 957 s = mp->start % NBPG; 958 if (mp->size >= s) { 959 mp->size -= s; 960 mp->start += s; 961 } 962 /* 963 * And now align the size of the region. 964 */ 965 mp->size -= mp->size % NBPG; 966 /* 967 * Check whether some memory is left here. 968 */ 969 if (mp->size == 0) { 970 bcopy(mp + 1, mp, 971 (pcnt - (mp - avail)) * sizeof *mp); 972 pcnt--; 973 mp--; 974 continue; 975 } 976 s = mp->start; 977 sz = mp->size; 978 for (mp1 = avail; mp1 < mp; mp1++) 979 if (s < mp1->start) 980 break; 981 if (mp1 < mp) { 982 bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1); 983 mp1->start = s; 984 mp1->size = sz; 985 } 986 /* 987 * In future we should be able to specify both allocated 988 * and free. 989 */ 990 uvm_page_physload( 991 atop(mp->start), 992 atop(mp->start+mp->size), 993 atop(mp->start), 994 atop(mp->start+mp->size), 0); 995 } 996 997 #if 0 998 /* finally, free up any space that valloc did not use */ 999 prom_unmap_virt((vaddr_t)ekdata, roundup(ekdata, 4*MEG) - ekdata); 1000 if (ekdatap < roundup(kdatap, 4*MEG))) { 1001 uvm_page_physload(atop(ekdatap), 1002 atop(roundup(ekdatap, (4*MEG))), 1003 atop(ekdatap), 1004 atop(roundup(ekdatap, (4*MEG))), 0); 1005 } 1006 #endif 1007 1008 #ifdef DEBUG 1009 if (pmapdebug & PDB_BOOT) { 1010 /* print out mem list */ 1011 prom_printf("Available physical memory after cleanup:\r\n"); 1012 for (mp = avail; mp->size; mp++) { 1013 prom_printf("avail start %lx size %lx\r\n", 1014 (long)mp->start, (long)mp->size); 1015 } 1016 prom_printf("End of available physical memory after cleanup\r\n"); 1017 } 1018 #endif 1019 /* 1020 * Allocate and clear out pmap_kernel()->pm_segs[] 1021 */ 1022 mtx_init(&pmap_kernel()->pm_mtx, IPL_VM); 1023 pmap_kernel()->pm_refs = 1; 1024 pmap_kernel()->pm_ctx = 0; 1025 { 1026 paddr_t newp; 1027 1028 do { 1029 pmap_get_page(&newp, NULL, pmap_kernel()); 1030 } while (!newp); /* Throw away page zero */ 1031 pmap_kernel()->pm_segs=(int64_t *)(u_long)newp; 1032 pmap_kernel()->pm_physaddr = newp; 1033 /* mark kernel context as busy */ 1034 ((paddr_t*)ctxbusy)[0] = pmap_kernel()->pm_physaddr; 1035 } 1036 /* 1037 * finish filling out kernel pmap. 1038 */ 1039 1040 BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\r\n", 1041 (long)pmap_kernel()->pm_physaddr)); 1042 /* 1043 * Tell pmap about our mesgbuf -- Hope this works already 1044 */ 1045 #ifdef DEBUG 1046 BDPRINTF(PDB_BOOT1, ("Calling consinit()\r\n")); 1047 if (pmapdebug & PDB_BOOT1) consinit(); 1048 BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\r\n")); 1049 #endif 1050 /* it's not safe to call pmap_enter so we need to do this ourselves */ 1051 va = (vaddr_t)msgbufp; 1052 prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1); 1053 while (msgbufsiz) { 1054 data = TSB_DATA(0 /* global */, 1055 PGSZ_8K, 1056 phys_msgbuf, 1057 1 /* priv */, 1058 1 /* Write */, 1059 1 /* Cacheable */, 1060 0 /* ALIAS -- Disable D$ */, 1061 1 /* valid */, 1062 0 /* IE */); 1063 pmap_enter_kpage(va, data); 1064 va += PAGE_SIZE; 1065 msgbufsiz -= PAGE_SIZE; 1066 phys_msgbuf += PAGE_SIZE; 1067 } 1068 BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\r\n")); 1069 1070 BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\r\n")); 1071 data = (CPU_ISSUN4V ? SUN4V_TLB_EXEC : SUN4U_TLB_EXEC); 1072 for (i = 0; i < prom_map_size; i++) { 1073 if (prom_map[i].vstart && ((prom_map[i].vstart>>32) == 0)) { 1074 for (j = 0; j < prom_map[i].vsize; j += NBPG) { 1075 int k; 1076 uint64_t tte; 1077 1078 for (k = 0; page_size_map[k].mask; k++) { 1079 if (((prom_map[i].vstart | 1080 prom_map[i].tte) & 1081 page_size_map[k].mask) == 0 && 1082 page_size_map[k].mask < 1083 prom_map[i].vsize) 1084 break; 1085 } 1086 /* Enter PROM map into pmap_kernel() */ 1087 tte = prom_map[i].tte; 1088 if (CPU_ISSUN4V) 1089 tte &= ~SUN4V_TLB_SOFT_MASK; 1090 else 1091 tte &= ~(SUN4U_TLB_SOFT2_MASK | 1092 SUN4U_TLB_SOFT_MASK); 1093 pmap_enter_kpage(prom_map[i].vstart + j, 1094 (tte + j) | data | page_size_map[k].code); 1095 } 1096 } 1097 } 1098 BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\r\n")); 1099 1100 /* 1101 * Fix up start of kernel heap. 1102 */ 1103 vmmap = (vaddr_t)roundup(ekdata, 4*MEG); 1104 /* Let's keep 1 page of redzone after the kernel */ 1105 vmmap += NBPG; 1106 { 1107 extern vaddr_t u0[2]; 1108 extern struct pcb* proc0paddr; 1109 extern void main(void); 1110 paddr_t pa; 1111 1112 /* Initialize all the pointers to u0 */ 1113 u0[0] = vmmap; 1114 /* Allocate some VAs for u0 */ 1115 u0[1] = vmmap + 2*USPACE; 1116 1117 BDPRINTF(PDB_BOOT1, 1118 ("Inserting stack 0 into pmap_kernel() at %p\r\n", 1119 vmmap)); 1120 1121 while (vmmap < u0[1]) { 1122 int64_t data; 1123 1124 pmap_get_page(&pa, NULL, pmap_kernel()); 1125 prom_map_phys(pa, NBPG, vmmap, -1); 1126 data = TSB_DATA(0 /* global */, 1127 PGSZ_8K, 1128 pa, 1129 1 /* priv */, 1130 1 /* Write */, 1131 1 /* Cacheable */, 1132 0 /* ALIAS -- Disable D$ */, 1133 1 /* valid */, 1134 0 /* IE */); 1135 pmap_enter_kpage(vmmap, data); 1136 vmmap += NBPG; 1137 } 1138 BDPRINTF(PDB_BOOT1, 1139 ("Done inserting stack 0 into pmap_kernel()\r\n")); 1140 1141 /* Now map in and initialize our cpu_info structure */ 1142 #ifdef DIAGNOSTIC 1143 vmmap += NBPG; /* redzone -- XXXX do we need one? */ 1144 #endif 1145 intstk = vmmap = roundup(vmmap, 64*KB); 1146 cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK); 1147 1148 BDPRINTF(PDB_BOOT1, 1149 ("Inserting cpu_info into pmap_kernel() at %p\r\n", 1150 cpus)); 1151 /* Now map in all 8 pages of cpu_info */ 1152 pa = cpu0paddr; 1153 prom_map_phys(pa, 64*KB, vmmap, -1); 1154 /* 1155 * Also map it in as the interrupt stack. 1156 * This lets the PROM see this if needed. 1157 * 1158 * XXXX locore.s does not flush these mappings 1159 * before installing the locked TTE. 1160 */ 1161 prom_map_phys(pa, 64*KB, CPUINFO_VA, -1); 1162 for (i=0; i<8; i++) { 1163 int64_t data; 1164 1165 data = TSB_DATA(0 /* global */, 1166 PGSZ_8K, 1167 pa, 1168 1 /* priv */, 1169 1 /* Write */, 1170 1 /* Cacheable */, 1171 0 /* ALIAS -- Disable D$ */, 1172 1 /* valid */, 1173 0 /* IE */); 1174 pmap_enter_kpage(vmmap, data); 1175 vmmap += NBPG; 1176 pa += NBPG; 1177 } 1178 BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\r\n")); 1179 1180 /* Initialize our cpu_info structure */ 1181 bzero((void *)intstk, 8*NBPG); 1182 cpus->ci_self = cpus; 1183 cpus->ci_next = NULL; /* Redundant, I know. */ 1184 cpus->ci_curproc = &proc0; 1185 cpus->ci_cpcb = (struct pcb *)u0[0]; /* Need better source */ 1186 cpus->ci_upaid = cpu_myid(); 1187 cpus->ci_cpuid = 0; 1188 cpus->ci_flags = CPUF_RUNNING; 1189 cpus->ci_fpproc = NULL; 1190 cpus->ci_spinup = main; /* Call main when we're running. */ 1191 cpus->ci_initstack = (void *)u0[1]; 1192 cpus->ci_paddr = cpu0paddr; 1193 #ifdef SUN4V 1194 cpus->ci_mmfsa = cpu0paddr; 1195 #endif 1196 proc0paddr = cpus->ci_cpcb; 1197 1198 cpu0paddr += 64 * KB; 1199 1200 /* The rest will be done at CPU attach time. */ 1201 BDPRINTF(PDB_BOOT1, 1202 ("Done inserting cpu_info into pmap_kernel()\r\n")); 1203 } 1204 1205 vmmap = (vaddr_t)reserve_dumppages((caddr_t)(u_long)vmmap); 1206 BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\r\n")); 1207 1208 pmap_bootstrap_cpu(cpus->ci_paddr); 1209 } 1210 1211 void sun4u_bootstrap_cpu(paddr_t); 1212 void sun4v_bootstrap_cpu(paddr_t); 1213 1214 void 1215 pmap_bootstrap_cpu(paddr_t intstack) 1216 { 1217 if (CPU_ISSUN4V) 1218 sun4v_bootstrap_cpu(intstack); 1219 else 1220 sun4u_bootstrap_cpu(intstack); 1221 } 1222 1223 extern void sun4u_set_tsbs(void); 1224 1225 void 1226 sun4u_bootstrap_cpu(paddr_t intstack) 1227 { 1228 u_int64_t data; 1229 paddr_t pa; 1230 vaddr_t va; 1231 int index; 1232 int impl; 1233 1234 impl = (getver() & VER_IMPL) >> VER_IMPL_SHIFT; 1235 1236 /* 1237 * Establish the 4MB locked mappings for kernel data and text. 1238 * 1239 * The text segment needs to be mapped into the DTLB too, 1240 * because of .rodata. 1241 */ 1242 1243 index = 15; /* XXX */ 1244 for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) { 1245 data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, 0, 1, 0); 1246 data |= SUN4U_TLB_L; 1247 prom_itlb_load(index, data, va); 1248 prom_dtlb_load(index, data, va); 1249 index--; 1250 } 1251 1252 for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) { 1253 data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, 0, 1, 0); 1254 data |= SUN4U_TLB_L; 1255 prom_dtlb_load(index, data, va); 1256 index--; 1257 } 1258 1259 #ifdef MULTIPROCESSOR 1260 if (impl >= IMPL_OLYMPUS_C && impl <= IMPL_JUPITER) { 1261 /* 1262 * On SPARC64-VI and SPARC64-VII processors, the MMU is 1263 * shared between threads, so we can't establish a locked 1264 * mapping for the interrupt stack since the mappings would 1265 * conflict. Instead we stick the address in a scratch 1266 * register, like we do for sun4v. 1267 */ 1268 pa = intstack + (CPUINFO_VA - INTSTACK); 1269 pa += offsetof(struct cpu_info, ci_self); 1270 va = ldxa(pa, ASI_PHYS_CACHED); 1271 stxa(0x00, ASI_SCRATCH, va); 1272 1273 if ((CPU_JUPITERID % 2) == 1) 1274 index--; 1275 1276 data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0); 1277 data |= SUN4U_TLB_L; 1278 prom_dtlb_load(index, data, va - (CPUINFO_VA - INTSTACK)); 1279 1280 sun4u_set_tsbs(); 1281 return; 1282 } 1283 #endif 1284 1285 /* 1286 * Establish the 64KB locked mapping for the interrupt stack. 1287 */ 1288 1289 data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0); 1290 data |= SUN4U_TLB_L; 1291 prom_dtlb_load(index, data, INTSTACK); 1292 1293 sun4u_set_tsbs(); 1294 } 1295 1296 void 1297 sun4v_bootstrap_cpu(paddr_t intstack) 1298 { 1299 #ifdef SUN4V 1300 u_int64_t data; 1301 paddr_t pa; 1302 vaddr_t va; 1303 int err; 1304 1305 /* 1306 * Establish the 4MB locked mappings for kernel data and text. 1307 * 1308 * The text segment needs to be mapped into the DTLB too, 1309 * because of .rodata. 1310 */ 1311 1312 for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) { 1313 data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, 0, 1, 0); 1314 data |= SUN4V_TLB_X; 1315 err = hv_mmu_map_perm_addr(va, data, MAP_ITLB|MAP_DTLB); 1316 if (err != H_EOK) 1317 prom_printf("err: %d\r\n", err); 1318 } 1319 1320 for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) { 1321 data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, 0, 1, 0); 1322 err = hv_mmu_map_perm_addr(va, data, MAP_DTLB); 1323 if (err != H_EOK) 1324 prom_printf("err: %d\r\n", err); 1325 } 1326 1327 #ifndef MULTIPROCESSOR 1328 /* 1329 * Establish the 64KB locked mapping for the interrupt stack. 1330 */ 1331 data = SUN4V_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0); 1332 err = hv_mmu_map_perm_addr(INTSTACK, data, MAP_DTLB); 1333 if (err != H_EOK) 1334 prom_printf("err: %d\r\n", err); 1335 #else 1336 pa = intstack + (CPUINFO_VA - INTSTACK); 1337 pa += offsetof(struct cpu_info, ci_self); 1338 stxa(0x00, ASI_SCRATCHPAD, ldxa(pa, ASI_PHYS_CACHED)); 1339 #endif 1340 1341 stxa(0x10, ASI_SCRATCHPAD, intstack + (CPUINFO_VA - INTSTACK)); 1342 1343 err = hv_mmu_tsb_ctx0(1, (paddr_t)tsb_desc + kdatap - kdata); 1344 if (err != H_EOK) 1345 prom_printf("err: %d\r\n", err); 1346 err = hv_mmu_tsb_ctxnon0(1, (paddr_t)tsb_desc + kdatap - kdata); 1347 if (err != H_EOK) 1348 prom_printf("err: %d\r\n", err); 1349 #endif 1350 } 1351 1352 /* 1353 * Initialize anything else for pmap handling. 1354 * Called during uvm_init(). 1355 */ 1356 void 1357 pmap_init(void) 1358 { 1359 BDPRINTF(PDB_BOOT1, ("pmap_init()\r\n")); 1360 if (PAGE_SIZE != NBPG) 1361 panic("pmap_init: CLSIZE!=1"); 1362 1363 /* Setup a pool for additional pvlist structures */ 1364 pool_init(&pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0, 1365 "pv_entry", NULL); 1366 pool_init(&pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0, 1367 "pmappl", NULL); 1368 } 1369 1370 /* Start of non-cachable physical memory on UltraSPARC-III. */ 1371 #define VM_MAXPHYS_ADDRESS ((vaddr_t)0x0000040000000000L) 1372 1373 static vaddr_t kbreak; /* End of kernel VA */ 1374 1375 /* 1376 * How much virtual space is available to the kernel? 1377 */ 1378 void 1379 pmap_virtual_space(vaddr_t *start, vaddr_t *end) 1380 { 1381 /* 1382 * Make sure virtual memory and physical memory don't overlap 1383 * to avoid problems with ASI_PHYS_CACHED on UltraSPARC-III. 1384 */ 1385 if (vmmap < VM_MAXPHYS_ADDRESS) 1386 vmmap = VM_MAXPHYS_ADDRESS; 1387 1388 /* Reserve two pages for pmap_copy_page && /dev/mem */ 1389 *start = kbreak = (vaddr_t)(vmmap + 2*NBPG); 1390 *end = VM_MAX_KERNEL_ADDRESS; 1391 BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\r\n", *start, *end)); 1392 } 1393 1394 /* 1395 * Preallocate kernel page tables to a specified VA. 1396 * This simply loops through the first TTE for each 1397 * page table from the beginning of the kernel pmap, 1398 * reads the entry, and if the result is 1399 * zero (either invalid entry or no page table) it stores 1400 * a zero there, populating page tables in the process. 1401 * This is not the most efficient technique but i don't 1402 * expect it to be called that often. 1403 */ 1404 vaddr_t 1405 pmap_growkernel(vaddr_t maxkvaddr) 1406 { 1407 paddr_t pg; 1408 struct pmap *pm = pmap_kernel(); 1409 1410 if (maxkvaddr >= VM_MAX_KERNEL_ADDRESS) { 1411 printf("WARNING: cannot extend kernel pmap beyond %p to %p\n", 1412 (void *)VM_MAX_KERNEL_ADDRESS, (void *)maxkvaddr); 1413 return (kbreak); 1414 } 1415 1416 /* Align with the start of a page table */ 1417 for (kbreak &= (-1<<PDSHIFT); kbreak < maxkvaddr; 1418 kbreak += (1<<PDSHIFT)) { 1419 if (pseg_get(pm, kbreak)) 1420 continue; 1421 1422 pg = 0; 1423 while (pseg_set(pm, kbreak, 0, pg) == 1) { 1424 pg = 0; 1425 pmap_get_page(&pg, "growk", pm); 1426 } 1427 1428 } 1429 1430 return (kbreak); 1431 } 1432 1433 /* 1434 * Create and return a physical map. 1435 */ 1436 struct pmap * 1437 pmap_create(void) 1438 { 1439 struct pmap *pm; 1440 1441 pm = pool_get(&pmap_pool, PR_WAITOK | PR_ZERO); 1442 1443 mtx_init(&pm->pm_mtx, IPL_VM); 1444 pm->pm_refs = 1; 1445 pmap_get_page(&pm->pm_physaddr, "pmap_create", pm); 1446 pm->pm_segs = (int64_t *)(u_long)pm->pm_physaddr; 1447 ctx_alloc(pm); 1448 1449 return (pm); 1450 } 1451 1452 /* 1453 * Add a reference to the given pmap. 1454 */ 1455 void 1456 pmap_reference(struct pmap *pm) 1457 { 1458 atomic_inc_int(&pm->pm_refs); 1459 } 1460 1461 /* 1462 * Retire the given pmap from service. 1463 * Should only be called if the map contains no valid mappings. 1464 */ 1465 void 1466 pmap_destroy(struct pmap *pm) 1467 { 1468 if (atomic_dec_int_nv(&pm->pm_refs) == 0) { 1469 pmap_release(pm); 1470 pool_put(&pmap_pool, pm); 1471 } 1472 } 1473 1474 /* 1475 * Release any resources held by the given physical map. 1476 * Called when a pmap initialized by pmap_pinit is being released. 1477 */ 1478 void 1479 pmap_release(struct pmap *pm) 1480 { 1481 int i, j, k; 1482 paddr_t *pdir, *ptbl, tmp; 1483 1484 #ifdef DIAGNOSTIC 1485 if(pm == pmap_kernel()) 1486 panic("pmap_release: releasing pmap_kernel()"); 1487 #endif 1488 1489 mtx_enter(&pm->pm_mtx); 1490 for(i=0; i<STSZ; i++) { 1491 paddr_t psegentp = (paddr_t)(u_long)&pm->pm_segs[i]; 1492 if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)psegentp, 1493 ASI_PHYS_CACHED))) { 1494 for (k=0; k<PDSZ; k++) { 1495 paddr_t pdirentp = (paddr_t)(u_long)&pdir[k]; 1496 if ((ptbl = (paddr_t *)(u_long)ldxa( 1497 (vaddr_t)pdirentp, ASI_PHYS_CACHED))) { 1498 for (j=0; j<PTSZ; j++) { 1499 int64_t data; 1500 paddr_t pa; 1501 pv_entry_t pv; 1502 1503 data = ldxa((vaddr_t)&ptbl[j], 1504 ASI_PHYS_CACHED); 1505 if (!(data & TLB_V)) 1506 continue; 1507 pa = data & TLB_PA_MASK; 1508 pv = pa_to_pvh(pa); 1509 if (pv != NULL) { 1510 printf("pmap_release: pm=%p page %llx still in use\n", pm, 1511 (unsigned long long)(((u_int64_t)i<<STSHIFT)|((u_int64_t)k<<PDSHIFT)|((u_int64_t)j<<PTSHIFT))); 1512 db_enter(); 1513 } 1514 } 1515 stxa(pdirentp, ASI_PHYS_CACHED, 0); 1516 pmap_free_page((paddr_t)ptbl, pm); 1517 } 1518 } 1519 stxa(psegentp, ASI_PHYS_CACHED, 0); 1520 pmap_free_page((paddr_t)pdir, pm); 1521 } 1522 } 1523 tmp = (paddr_t)(u_long)pm->pm_segs; 1524 pm->pm_segs = NULL; 1525 pmap_free_page(tmp, pm); 1526 mtx_leave(&pm->pm_mtx); 1527 ctx_free(pm); 1528 } 1529 1530 /* 1531 * Garbage collects the physical map system for 1532 * pages which are no longer used. 1533 * Success need not be guaranteed -- that is, there 1534 * may well be pages which are not referenced, but 1535 * others may be collected. 1536 * Called by the pageout daemon when pages are scarce. 1537 */ 1538 void 1539 pmap_collect(struct pmap *pm) 1540 { 1541 int i, j, k, n, m, s; 1542 paddr_t *pdir, *ptbl; 1543 /* This is a good place to scan the pmaps for page tables with 1544 * no valid mappings in them and free them. */ 1545 1546 /* NEVER GARBAGE COLLECT THE KERNEL PMAP */ 1547 if (pm == pmap_kernel()) 1548 return; 1549 1550 s = splvm(); 1551 for (i=0; i<STSZ; i++) { 1552 if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) { 1553 m = 0; 1554 for (k=0; k<PDSZ; k++) { 1555 if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) { 1556 m++; 1557 n = 0; 1558 for (j=0; j<PTSZ; j++) { 1559 int64_t data = ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED); 1560 if (data&TLB_V) 1561 n++; 1562 } 1563 if (!n) { 1564 /* Free the damn thing */ 1565 stxa((paddr_t)(u_long)&pdir[k], ASI_PHYS_CACHED, 0); 1566 pmap_free_page((paddr_t)ptbl, pm); 1567 } 1568 } 1569 } 1570 if (!m) { 1571 /* Free the damn thing */ 1572 stxa((paddr_t)(u_long)&pm->pm_segs[i], ASI_PHYS_CACHED, 0); 1573 pmap_free_page((paddr_t)pdir, pm); 1574 } 1575 } 1576 } 1577 splx(s); 1578 } 1579 1580 void 1581 pmap_zero_page(struct vm_page *pg) 1582 { 1583 pmap_zero_phys(VM_PAGE_TO_PHYS(pg)); 1584 } 1585 1586 void 1587 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 1588 { 1589 paddr_t src = VM_PAGE_TO_PHYS(srcpg); 1590 paddr_t dst = VM_PAGE_TO_PHYS(dstpg); 1591 1592 pmap_copy_phys(src, dst); 1593 } 1594 1595 /* 1596 * Activate the address space for the specified process. If the 1597 * process is the current process, load the new MMU context. 1598 */ 1599 void 1600 pmap_activate(struct proc *p) 1601 { 1602 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 1603 int s; 1604 1605 /* 1606 * This is essentially the same thing that happens in cpu_switch() 1607 * when the newly selected process is about to run, except that we 1608 * have to make sure to clean the register windows before we set 1609 * the new context. 1610 */ 1611 1612 s = splvm(); 1613 if (p == curproc) { 1614 write_user_windows(); 1615 if (pmap->pm_ctx == 0) 1616 ctx_alloc(pmap); 1617 if (CPU_ISSUN4V) 1618 stxa(CTX_SECONDARY, ASI_MMU_CONTEXTID, pmap->pm_ctx); 1619 else 1620 stxa(CTX_SECONDARY, ASI_DMMU, pmap->pm_ctx); 1621 } 1622 splx(s); 1623 } 1624 1625 /* 1626 * Deactivate the address space of the specified process. 1627 */ 1628 void 1629 pmap_deactivate(struct proc *p) 1630 { 1631 } 1632 1633 /* 1634 * pmap_kenter_pa: [ INTERFACE ] 1635 * 1636 * Enter a va -> pa mapping into the kernel pmap without any 1637 * physical->virtual tracking. 1638 * 1639 * Note: no locking is necessary in this function. 1640 */ 1641 void 1642 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 1643 { 1644 struct pmap *pm = pmap_kernel(); 1645 pte_t tte; 1646 1647 KDASSERT(va < INTSTACK || va > EINTSTACK); 1648 KDASSERT(va < kdata || va > ekdata); 1649 1650 #ifdef DIAGNOSTIC 1651 if (pa & (PMAP_NVC|PMAP_NC|PMAP_LITTLE)) 1652 panic("%s: illegal cache flags 0x%lx", __func__, pa); 1653 #endif 1654 1655 /* 1656 * Construct the TTE. 1657 */ 1658 tte.tag = TSB_TAG(0, pm->pm_ctx,va); 1659 if (CPU_ISSUN4V) { 1660 tte.data = SUN4V_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */, 1661 (PROT_WRITE & prot), 1, 0, 1, 0); 1662 /* 1663 * We don't track modification on kenter mappings. 1664 */ 1665 if (prot & PROT_WRITE) 1666 tte.data |= SUN4V_TLB_REAL_W|SUN4V_TLB_W; 1667 if (prot & PROT_EXEC) 1668 tte.data |= SUN4V_TLB_EXEC; 1669 tte.data |= SUN4V_TLB_TSB_LOCK; /* wired */ 1670 } else { 1671 tte.data = SUN4U_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */, 1672 (PROT_WRITE & prot), 1, 0, 1, 0); 1673 /* 1674 * We don't track modification on kenter mappings. 1675 */ 1676 if (prot & PROT_WRITE) 1677 tte.data |= SUN4U_TLB_REAL_W|SUN4U_TLB_W; 1678 if (prot & PROT_EXEC) 1679 tte.data |= SUN4U_TLB_EXEC; 1680 if (prot == PROT_EXEC) 1681 tte.data |= SUN4U_TLB_EXEC_ONLY; 1682 tte.data |= SUN4U_TLB_TSB_LOCK; /* wired */ 1683 } 1684 KDASSERT((tte.data & TLB_NFO) == 0); 1685 1686 /* Kernel page tables are pre-allocated. */ 1687 if (pseg_set(pm, va, tte.data, 0) != 0) 1688 panic("%s: no pseg", __func__); 1689 1690 /* this is correct */ 1691 dcache_flush_page(pa); 1692 } 1693 1694 /* 1695 * pmap_kremove: [ INTERFACE ] 1696 * 1697 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1698 * for size bytes (assumed to be page rounded). 1699 */ 1700 void 1701 pmap_kremove(vaddr_t va, vsize_t size) 1702 { 1703 struct pmap *pm = pmap_kernel(); 1704 1705 KDASSERT(va < INTSTACK || va > EINTSTACK); 1706 KDASSERT(va < kdata || va > ekdata); 1707 1708 while (size >= NBPG) { 1709 /* 1710 * Is this part of the permanent 4MB mapping? 1711 */ 1712 #ifdef DIAGNOSTIC 1713 if (pm == pmap_kernel() && 1714 (va >= ktext && va < roundup(ekdata, 4*MEG))) 1715 panic("%s: va=0x%lx in locked TLB", __func__, va); 1716 #endif 1717 /* Shouldn't need to do this if the entry's not valid. */ 1718 if (pseg_get(pm, va)) { 1719 /* We need to flip the valid bit and clear the access statistics. */ 1720 if (pseg_set(pm, va, 0, 0)) { 1721 printf("pmap_kremove: gotten pseg empty!\n"); 1722 db_enter(); 1723 /* panic? */ 1724 } 1725 1726 tsb_invalidate(pm->pm_ctx, va); 1727 /* Here we assume nothing can get into the TLB unless it has a PTE */ 1728 tlb_flush_pte(va, pm->pm_ctx); 1729 } 1730 va += NBPG; 1731 size -= NBPG; 1732 } 1733 } 1734 1735 /* 1736 * Insert physical page at pa into the given pmap at virtual address va. 1737 * Supports 64-bit pa so we can map I/O space. 1738 */ 1739 int 1740 pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 1741 { 1742 pte_t tte; 1743 paddr_t pg; 1744 int aliased = 0; 1745 pv_entry_t pv, npv; 1746 int size = 0; /* PMAP_SZ_TO_TTE(pa); */ 1747 boolean_t wired = (flags & PMAP_WIRED) != 0; 1748 1749 /* 1750 * Is this part of the permanent mappings? 1751 */ 1752 KDASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK); 1753 KDASSERT(pm != pmap_kernel() || va < kdata || va > ekdata); 1754 1755 npv = pool_get(&pv_pool, PR_NOWAIT); 1756 if (npv == NULL && (flags & PMAP_CANFAIL)) 1757 return (ENOMEM); 1758 1759 /* 1760 * XXXX If a mapping at this address already exists, remove it. 1761 */ 1762 mtx_enter(&pm->pm_mtx); 1763 tte.data = pseg_get(pm, va); 1764 if (tte.data & TLB_V) { 1765 mtx_leave(&pm->pm_mtx); 1766 pmap_remove(pm, va, va + NBPG-1); 1767 mtx_enter(&pm->pm_mtx); 1768 tte.data = pseg_get(pm, va); 1769 } 1770 1771 /* 1772 * Construct the TTE. 1773 */ 1774 pv = pa_to_pvh(pa); 1775 if (pv != NULL) { 1776 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 1777 1778 mtx_enter(&pg->mdpage.pvmtx); 1779 aliased = (pv->pv_va & PV_ALIAS); 1780 #ifdef DIAGNOSTIC 1781 if ((flags & PROT_MASK) & ~prot) 1782 panic("pmap_enter: access_type exceeds prot"); 1783 #endif 1784 /* If we don't have the traphandler do it, set the ref/mod bits now */ 1785 if (flags & PROT_MASK) 1786 pv->pv_va |= PV_REF; 1787 if (flags & PROT_WRITE) 1788 pv->pv_va |= PV_MOD; 1789 pv->pv_va |= pmap_tte2flags(tte.data); 1790 mtx_leave(&pg->mdpage.pvmtx); 1791 } else { 1792 aliased = 0; 1793 } 1794 if (pa & PMAP_NVC) 1795 aliased = 1; 1796 if (CPU_ISSUN4V) { 1797 tte.data = SUN4V_TSB_DATA(0, size, pa, pm == pmap_kernel(), 1798 (flags & PROT_WRITE), (!(pa & PMAP_NC)), 1799 aliased, 1, (pa & PMAP_LITTLE)); 1800 if (prot & PROT_WRITE) 1801 tte.data |= SUN4V_TLB_REAL_W; 1802 if (prot & PROT_EXEC) 1803 tte.data |= SUN4V_TLB_EXEC; 1804 if (wired) 1805 tte.data |= SUN4V_TLB_TSB_LOCK; 1806 } else { 1807 tte.data = SUN4U_TSB_DATA(0, size, pa, pm == pmap_kernel(), 1808 (flags & PROT_WRITE), (!(pa & PMAP_NC)), 1809 aliased, 1, (pa & PMAP_LITTLE)); 1810 if (prot & PROT_WRITE) 1811 tte.data |= SUN4U_TLB_REAL_W; 1812 if (prot & PROT_EXEC) 1813 tte.data |= SUN4U_TLB_EXEC; 1814 if (prot == PROT_EXEC) 1815 tte.data |= SUN4U_TLB_EXEC_ONLY; 1816 if (wired) 1817 tte.data |= SUN4U_TLB_TSB_LOCK; 1818 } 1819 KDASSERT((tte.data & TLB_NFO) == 0); 1820 1821 pg = 0; 1822 while (pseg_set(pm, va, tte.data, pg) == 1) { 1823 pg = 0; 1824 if (!pmap_get_page(&pg, NULL, pm)) { 1825 if ((flags & PMAP_CANFAIL) == 0) 1826 panic("pmap_enter: no memory"); 1827 mtx_leave(&pm->pm_mtx); 1828 if (npv != NULL) 1829 pool_put(&pv_pool, npv); 1830 return (ENOMEM); 1831 } 1832 } 1833 1834 if (pv != NULL) 1835 npv = pmap_enter_pv(pm, npv, va, pa); 1836 atomic_inc_long(&pm->pm_stats.resident_count); 1837 mtx_leave(&pm->pm_mtx); 1838 if (pm->pm_ctx || pm == pmap_kernel()) { 1839 tsb_invalidate(pm->pm_ctx, va); 1840 1841 /* Force reload -- protections may be changed */ 1842 tlb_flush_pte(va, pm->pm_ctx); 1843 } 1844 /* this is correct */ 1845 dcache_flush_page(pa); 1846 1847 if (npv != NULL) 1848 pool_put(&pv_pool, npv); 1849 1850 /* We will let the fast mmu miss interrupt load the new translation */ 1851 return 0; 1852 } 1853 1854 /* 1855 * Remove the given range of mapping entries. 1856 */ 1857 void 1858 pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva) 1859 { 1860 pv_entry_t pv, freepvs = NULL; 1861 int flush = 0; 1862 int64_t data; 1863 vaddr_t flushva = va; 1864 1865 /* 1866 * In here we should check each pseg and if there are no more entries, 1867 * free it. It's just that linear scans of 8K pages gets expensive. 1868 */ 1869 1870 KDASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK); 1871 KDASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata); 1872 1873 mtx_enter(&pm->pm_mtx); 1874 1875 /* Now do the real work */ 1876 while (va < endva) { 1877 /* 1878 * Is this part of the permanent 4MB mapping? 1879 */ 1880 #ifdef DIAGNOSTIC 1881 if (pm == pmap_kernel() && va >= ktext && 1882 va < roundup(ekdata, 4*MEG)) 1883 panic("pmap_remove: va=%08x in locked TLB", (u_int)va); 1884 #endif 1885 /* We don't really need to do this if the valid bit is not set... */ 1886 if ((data = pseg_get(pm, va)) && (data & TLB_V) != 0) { 1887 paddr_t entry; 1888 1889 flush |= 1; 1890 /* First remove it from the pv_table */ 1891 entry = (data & TLB_PA_MASK); 1892 pv = pa_to_pvh(entry); 1893 if (pv != NULL) { 1894 pv = pmap_remove_pv(pm, va, entry); 1895 if (pv != NULL) { 1896 pv->pv_next = freepvs; 1897 freepvs = pv; 1898 } 1899 } 1900 /* We need to flip the valid bit and clear the access statistics. */ 1901 if (pseg_set(pm, va, 0, 0)) { 1902 printf("pmap_remove: gotten pseg empty!\n"); 1903 db_enter(); 1904 /* panic? */ 1905 } 1906 atomic_dec_long(&pm->pm_stats.resident_count); 1907 if (!pm->pm_ctx && pm != pmap_kernel()) 1908 continue; 1909 tsb_invalidate(pm->pm_ctx, va); 1910 /* Here we assume nothing can get into the TLB unless it has a PTE */ 1911 tlb_flush_pte(va, pm->pm_ctx); 1912 } 1913 va += NBPG; 1914 } 1915 1916 mtx_leave(&pm->pm_mtx); 1917 1918 while ((pv = freepvs) != NULL) { 1919 freepvs = pv->pv_next; 1920 pool_put(&pv_pool, pv); 1921 } 1922 1923 if (flush) 1924 cache_flush_virt(flushva, endva - flushva); 1925 } 1926 1927 /* 1928 * Change the protection on the specified range of this pmap. 1929 */ 1930 void 1931 pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1932 { 1933 paddr_t pa; 1934 pv_entry_t pv; 1935 int64_t data; 1936 1937 KDASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK); 1938 KDASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata); 1939 1940 if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC)) 1941 return; 1942 1943 if (prot == PROT_NONE) { 1944 pmap_remove(pm, sva, eva); 1945 return; 1946 } 1947 1948 mtx_enter(&pm->pm_mtx); 1949 sva = sva & ~PGOFSET; 1950 while (sva < eva) { 1951 /* 1952 * Is this part of the permanent 4MB mapping? 1953 */ 1954 if (pm == pmap_kernel() && sva >= ktext && 1955 sva < roundup(ekdata, 4*MEG)) { 1956 prom_printf("pmap_protect: va=%08x in locked TLB\r\n", sva); 1957 OF_enter(); 1958 mtx_leave(&pm->pm_mtx); 1959 return; 1960 } 1961 1962 if (((data = pseg_get(pm, sva))&TLB_V) /*&& ((data&TLB_TSB_LOCK) == 0)*/) { 1963 pa = data & TLB_PA_MASK; 1964 pv = pa_to_pvh(pa); 1965 if (pv != NULL) { 1966 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 1967 1968 /* Save REF/MOD info */ 1969 mtx_enter(&pg->mdpage.pvmtx); 1970 pv->pv_va |= pmap_tte2flags(data); 1971 mtx_leave(&pg->mdpage.pvmtx); 1972 } 1973 /* Just do the pmap and TSB, not the pv_list */ 1974 if (CPU_ISSUN4V) { 1975 if ((prot & PROT_WRITE) == 0) 1976 data &= ~(SUN4V_TLB_W|SUN4V_TLB_REAL_W); 1977 if ((prot & PROT_EXEC) == 0) 1978 data &= ~(SUN4V_TLB_EXEC); 1979 } else { 1980 if ((prot & PROT_WRITE) == 0) 1981 data &= ~(SUN4U_TLB_W|SUN4U_TLB_REAL_W); 1982 if ((prot & PROT_EXEC) == 0) 1983 data &= ~(SUN4U_TLB_EXEC | SUN4U_TLB_EXEC_ONLY); 1984 } 1985 KDASSERT((data & TLB_NFO) == 0); 1986 if (pseg_set(pm, sva, data, 0)) { 1987 printf("pmap_protect: gotten pseg empty!\n"); 1988 db_enter(); 1989 /* panic? */ 1990 } 1991 1992 if (!pm->pm_ctx && pm != pmap_kernel()) 1993 continue; 1994 tsb_invalidate(pm->pm_ctx, sva); 1995 tlb_flush_pte(sva, pm->pm_ctx); 1996 } 1997 sva += NBPG; 1998 } 1999 mtx_leave(&pm->pm_mtx); 2000 } 2001 2002 /* 2003 * Extract the physical page address associated 2004 * with the given map/virtual_address pair. 2005 */ 2006 boolean_t 2007 pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap) 2008 { 2009 paddr_t pa; 2010 2011 if (pm == pmap_kernel()) { 2012 if (va >= kdata && va < roundup(ekdata, 4*MEG)) { 2013 /* Need to deal w/locked TLB entry specially. */ 2014 pa = (paddr_t)(kdatap - kdata + va); 2015 } else if (va >= ktext && va < ektext) { 2016 /* Need to deal w/locked TLB entry specially. */ 2017 pa = (paddr_t)(ktextp - ktext + va); 2018 } else if (va >= INTSTACK && va < EINTSTACK) { 2019 pa = curcpu()->ci_paddr + va - INTSTACK; 2020 } else { 2021 goto check_pseg; 2022 } 2023 } else { 2024 check_pseg: 2025 mtx_enter(&pm->pm_mtx); 2026 pa = pseg_get(pm, va) & TLB_PA_MASK; 2027 mtx_leave(&pm->pm_mtx); 2028 if (pa == 0) 2029 return FALSE; 2030 pa |= va & PAGE_MASK; 2031 } 2032 if (pap != NULL) 2033 *pap = pa; 2034 return TRUE; 2035 } 2036 2037 /* 2038 * Return the number bytes that pmap_dumpmmu() will dump. 2039 */ 2040 int 2041 pmap_dumpsize(void) 2042 { 2043 int sz; 2044 2045 sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)); 2046 sz += memsize * sizeof(phys_ram_seg_t); 2047 2048 return btodb(sz + DEV_BSIZE - 1); 2049 } 2050 2051 /* 2052 * Write the mmu contents to the dump device. 2053 * This gets appended to the end of a crash dump since 2054 * there is no in-core copy of kernel memory mappings on a 4/4c machine. 2055 * 2056 * Write the core dump headers and MD data to the dump device. 2057 * We dump the following items: 2058 * 2059 * kcore_seg_t MI header defined in <sys/kcore.h>) 2060 * cpu_kcore_hdr_t MD header defined in <machine/kcore.h>) 2061 * phys_ram_seg_t[memsize] physical memory segments 2062 */ 2063 int 2064 pmap_dumpmmu(int (*dump)(dev_t, daddr_t, caddr_t, size_t), daddr_t blkno) 2065 { 2066 kcore_seg_t *kseg; 2067 cpu_kcore_hdr_t *kcpu; 2068 phys_ram_seg_t memseg; 2069 register int error = 0; 2070 register int i, memsegoffset; 2071 int buffer[dbtob(1) / sizeof(int)]; 2072 int *bp, *ep; 2073 2074 #define EXPEDITE(p,n) do { \ 2075 int *sp = (int *)(p); \ 2076 int sz = (n); \ 2077 while (sz > 0) { \ 2078 *bp++ = *sp++; \ 2079 if (bp >= ep) { \ 2080 error = (*dump)(dumpdev, blkno, \ 2081 (caddr_t)buffer, dbtob(1)); \ 2082 if (error != 0) \ 2083 return (error); \ 2084 ++blkno; \ 2085 bp = buffer; \ 2086 } \ 2087 sz -= 4; \ 2088 } \ 2089 } while (0) 2090 2091 /* Setup bookkeeping pointers */ 2092 bp = buffer; 2093 ep = &buffer[sizeof(buffer) / sizeof(buffer[0])]; 2094 2095 /* Fill in MI segment header */ 2096 kseg = (kcore_seg_t *)bp; 2097 CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU); 2098 kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t)); 2099 2100 /* Fill in MD segment header (interpreted by MD part of libkvm) */ 2101 kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t))); 2102 kcpu->cputype = CPU_SUN4U; 2103 kcpu->kernbase = (u_int64_t)KERNBASE; 2104 kcpu->cpubase = (u_int64_t)CPUINFO_VA; 2105 2106 /* Describe the locked text segment */ 2107 kcpu->ktextbase = (u_int64_t)ktext; 2108 kcpu->ktextp = (u_int64_t)ktextp; 2109 kcpu->ktextsz = (u_int64_t)(roundup(ektextp, 4*MEG) - ktextp); 2110 2111 /* Describe locked data segment */ 2112 kcpu->kdatabase = (u_int64_t)kdata; 2113 kcpu->kdatap = (u_int64_t)kdatap; 2114 kcpu->kdatasz = (u_int64_t)(roundup(ekdatap, 4*MEG) - kdatap); 2115 2116 /* Now the memsegs */ 2117 kcpu->nmemseg = memsize; 2118 kcpu->memsegoffset = memsegoffset = ALIGN(sizeof(cpu_kcore_hdr_t)); 2119 2120 /* Now we need to point this at our kernel pmap. */ 2121 kcpu->nsegmap = STSZ; 2122 kcpu->segmapoffset = (u_int64_t)pmap_kernel()->pm_physaddr; 2123 2124 /* Note: we have assumed everything fits in buffer[] so far... */ 2125 bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t))); 2126 2127 for (i = 0; i < memsize; i++) { 2128 memseg.start = mem[i].start; 2129 memseg.size = mem[i].size; 2130 EXPEDITE(&memseg, sizeof(phys_ram_seg_t)); 2131 } 2132 2133 if (bp != buffer) 2134 error = (*dump)(dumpdev, blkno++, (caddr_t)buffer, dbtob(1)); 2135 2136 return (error); 2137 } 2138 2139 /* 2140 * Determine (non)existence of physical page 2141 */ 2142 int pmap_pa_exists(paddr_t pa) 2143 { 2144 struct mem_region *mp; 2145 2146 /* Just go through physical memory list & see if we're there */ 2147 for (mp = mem; mp->size && mp->start <= pa; mp++) 2148 if (mp->start <= pa && mp->start + mp->size >= pa) 2149 return 1; 2150 return 0; 2151 } 2152 2153 /* 2154 * Lookup the appropriate TSB entry. 2155 * 2156 * Here is the full official pseudo code: 2157 * 2158 */ 2159 2160 #ifdef NOTYET 2161 int64 GenerateTSBPointer( 2162 int64 va, /* Missing VA */ 2163 PointerType type, /* 8K_POINTER or 16K_POINTER */ 2164 int64 TSBBase, /* TSB Register[63:13] << 13 */ 2165 Boolean split, /* TSB Register[12] */ 2166 int TSBSize) /* TSB Register[2:0] */ 2167 { 2168 int64 vaPortion; 2169 int64 TSBBaseMask; 2170 int64 splitMask; 2171 2172 /* TSBBaseMask marks the bits from TSB Base Reg */ 2173 TSBBaseMask = 0xffffffffffffe000 << 2174 (split? (TSBsize + 1) : TSBsize); 2175 2176 /* Shift va towards lsb appropriately and */ 2177 /* zero out the original va page offset */ 2178 vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) & 2179 0xfffffffffffffff0; 2180 2181 if (split) { 2182 /* There's only one bit in question for split */ 2183 splitMask = 1 << (13 + TSBsize); 2184 if (type == 8K_POINTER) 2185 /* Make sure we're in the lower half */ 2186 vaPortion &= ~splitMask; 2187 else 2188 /* Make sure we're in the upper half */ 2189 vaPortion |= splitMask; 2190 } 2191 return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask); 2192 } 2193 #endif 2194 /* 2195 * Of course, since we are not using a split TSB or variable page sizes, 2196 * we can optimize this a bit. 2197 * 2198 * The following only works for a unified 8K TSB. It will find the slot 2199 * for that particular va and return it. IT MAY BE FOR ANOTHER MAPPING! 2200 */ 2201 int 2202 ptelookup_va(vaddr_t va) 2203 { 2204 long tsbptr; 2205 #define TSBBASEMASK (0xffffffffffffe000LL<<tsbsize) 2206 2207 tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK ); 2208 return (tsbptr/sizeof(pte_t)); 2209 } 2210 2211 /* 2212 * Do whatever is needed to sync the MOD/REF flags 2213 */ 2214 2215 boolean_t 2216 pmap_clear_modify(struct vm_page *pg) 2217 { 2218 paddr_t pa = VM_PAGE_TO_PHYS(pg); 2219 int changed = 0; 2220 pv_entry_t pv; 2221 2222 /* Clear all mappings */ 2223 mtx_enter(&pg->mdpage.pvmtx); 2224 pv = pa_to_pvh(pa); 2225 if (pv->pv_va & PV_MOD) { 2226 changed |= 1; 2227 pv->pv_va &= ~PV_MOD; 2228 } 2229 if (pv->pv_pmap != NULL) { 2230 for (; pv; pv = pv->pv_next) { 2231 int64_t data; 2232 2233 /* First clear the mod bit in the PTE and make it R/O */ 2234 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2235 2236 /* Need to both clear the modify and write bits */ 2237 if (CPU_ISSUN4V) { 2238 if (data & (SUN4V_TLB_MODIFY)) 2239 changed |= 1; 2240 data &= ~(SUN4V_TLB_MODIFY|SUN4V_TLB_W); 2241 } else { 2242 if (data & (SUN4U_TLB_MODIFY)) 2243 changed |= 1; 2244 data &= ~(SUN4U_TLB_MODIFY|SUN4U_TLB_W); 2245 } 2246 KDASSERT((data & TLB_NFO) == 0); 2247 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) { 2248 printf("pmap_clear_modify: gotten pseg empty!\n"); 2249 db_enter(); 2250 /* panic? */ 2251 } 2252 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2253 tsb_invalidate(pv->pv_pmap->pm_ctx, 2254 (pv->pv_va & PV_VAMASK)); 2255 tlb_flush_pte((pv->pv_va & PV_VAMASK), 2256 pv->pv_pmap->pm_ctx); 2257 } 2258 /* Then clear the mod bit in the pv */ 2259 if (pv->pv_va & PV_MOD) { 2260 changed |= 1; 2261 pv->pv_va &= ~PV_MOD; 2262 } 2263 dcache_flush_page(pa); 2264 } 2265 } 2266 mtx_leave(&pg->mdpage.pvmtx); 2267 2268 return (changed); 2269 } 2270 2271 boolean_t 2272 pmap_clear_reference(struct vm_page *pg) 2273 { 2274 paddr_t pa = VM_PAGE_TO_PHYS(pg); 2275 int changed = 0; 2276 pv_entry_t pv; 2277 2278 /* Clear all references */ 2279 mtx_enter(&pg->mdpage.pvmtx); 2280 pv = pa_to_pvh(pa); 2281 if (pv->pv_va & PV_REF) { 2282 changed = 1; 2283 pv->pv_va &= ~PV_REF; 2284 } 2285 if (pv->pv_pmap != NULL) { 2286 for (; pv; pv = pv->pv_next) { 2287 int64_t data; 2288 2289 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2290 if (CPU_ISSUN4V) { 2291 if (data & SUN4V_TLB_ACCESS) 2292 changed = 1; 2293 data &= ~SUN4V_TLB_ACCESS; 2294 } else { 2295 if (data & SUN4U_TLB_ACCESS) 2296 changed = 1; 2297 data &= ~SUN4U_TLB_ACCESS; 2298 } 2299 KDASSERT((data & TLB_NFO) == 0); 2300 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) { 2301 printf("pmap_clear_reference: gotten pseg empty!\n"); 2302 db_enter(); 2303 /* panic? */ 2304 } 2305 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2306 tsb_invalidate(pv->pv_pmap->pm_ctx, 2307 (pv->pv_va & PV_VAMASK)); 2308 /* 2309 tlb_flush_pte(pv->pv_va & PV_VAMASK, 2310 pv->pv_pmap->pm_ctx); 2311 */ 2312 } 2313 if (pv->pv_va & PV_REF) { 2314 changed = 1; 2315 pv->pv_va &= ~PV_REF; 2316 } 2317 } 2318 } 2319 /* Stupid here will take a cache hit even on unmapped pages 8^( */ 2320 dcache_flush_page(VM_PAGE_TO_PHYS(pg)); 2321 mtx_leave(&pg->mdpage.pvmtx); 2322 2323 return (changed); 2324 } 2325 2326 boolean_t 2327 pmap_is_modified(struct vm_page *pg) 2328 { 2329 pv_entry_t pv, npv; 2330 int mod = 0; 2331 2332 /* Check if any mapping has been modified */ 2333 mtx_enter(&pg->mdpage.pvmtx); 2334 pv = &pg->mdpage.pvent; 2335 if (pv->pv_va & PV_MOD) 2336 mod = 1; 2337 if (!mod && (pv->pv_pmap != NULL)) { 2338 for (npv = pv; mod == 0 && npv && npv->pv_pmap; npv = npv->pv_next) { 2339 int64_t data; 2340 2341 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 2342 if (pmap_tte2flags(data) & PV_MOD) 2343 mod = 1; 2344 /* Migrate modify info to head pv */ 2345 if (npv->pv_va & PV_MOD) { 2346 mod = 1; 2347 npv->pv_va &= ~PV_MOD; 2348 } 2349 } 2350 } 2351 /* Save modify info */ 2352 if (mod) 2353 pv->pv_va |= PV_MOD; 2354 mtx_leave(&pg->mdpage.pvmtx); 2355 2356 return (mod); 2357 } 2358 2359 boolean_t 2360 pmap_is_referenced(struct vm_page *pg) 2361 { 2362 pv_entry_t pv, npv; 2363 int ref = 0; 2364 2365 /* Check if any mapping has been referenced */ 2366 mtx_enter(&pg->mdpage.pvmtx); 2367 pv = &pg->mdpage.pvent; 2368 if (pv->pv_va & PV_REF) 2369 ref = 1; 2370 if (!ref && (pv->pv_pmap != NULL)) { 2371 for (npv = pv; npv; npv = npv->pv_next) { 2372 int64_t data; 2373 2374 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 2375 if (pmap_tte2flags(data) & PV_REF) 2376 ref = 1; 2377 /* Migrate modify info to head pv */ 2378 if (npv->pv_va & PV_REF) { 2379 ref = 1; 2380 npv->pv_va &= ~PV_REF; 2381 } 2382 } 2383 } 2384 /* Save ref info */ 2385 if (ref) 2386 pv->pv_va |= PV_REF; 2387 mtx_leave(&pg->mdpage.pvmtx); 2388 2389 return (ref); 2390 } 2391 2392 /* 2393 * Routine: pmap_unwire 2394 * Function: Clear the wired attribute for a map/virtual-address 2395 * pair. 2396 * In/out conditions: 2397 * The mapping must already exist in the pmap. 2398 */ 2399 void 2400 pmap_unwire(struct pmap *pmap, vaddr_t va) 2401 { 2402 int64_t data; 2403 2404 if (pmap == NULL) 2405 return; 2406 2407 /* 2408 * Is this part of the permanent 4MB mapping? 2409 */ 2410 if (pmap == pmap_kernel() && va >= ktext && 2411 va < roundup(ekdata, 4*MEG)) { 2412 prom_printf("pmap_unwire: va=%08x in locked TLB\r\n", va); 2413 OF_enter(); 2414 return; 2415 } 2416 mtx_enter(&pmap->pm_mtx); 2417 data = pseg_get(pmap, va & PV_VAMASK); 2418 2419 if (CPU_ISSUN4V) 2420 data &= ~SUN4V_TLB_TSB_LOCK; 2421 else 2422 data &= ~SUN4U_TLB_TSB_LOCK; 2423 2424 if (pseg_set(pmap, va & PV_VAMASK, data, 0)) { 2425 printf("pmap_unwire: gotten pseg empty!\n"); 2426 db_enter(); 2427 /* panic? */ 2428 } 2429 mtx_leave(&pmap->pm_mtx); 2430 } 2431 2432 /* 2433 * Lower the protection on the specified physical page. 2434 * 2435 * Never enable writing as it will break COW 2436 */ 2437 void 2438 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 2439 { 2440 paddr_t pa = VM_PAGE_TO_PHYS(pg); 2441 pv_entry_t pv; 2442 int64_t data, clear, set; 2443 2444 if (prot & PROT_WRITE) 2445 return; 2446 2447 if (prot & (PROT_READ | PROT_EXEC)) { 2448 /* copy_on_write */ 2449 2450 set = TLB_V; 2451 if (CPU_ISSUN4V) { 2452 clear = SUN4V_TLB_REAL_W|SUN4V_TLB_W; 2453 if (PROT_EXEC & prot) 2454 set |= SUN4V_TLB_EXEC; 2455 else 2456 clear |= SUN4V_TLB_EXEC; 2457 } else { 2458 clear = SUN4U_TLB_REAL_W|SUN4U_TLB_W; 2459 if (PROT_EXEC & prot) 2460 set |= SUN4U_TLB_EXEC; 2461 else 2462 clear |= SUN4U_TLB_EXEC; 2463 if (PROT_EXEC == prot) 2464 set |= SUN4U_TLB_EXEC_ONLY; 2465 else 2466 clear |= SUN4U_TLB_EXEC_ONLY; 2467 } 2468 2469 pv = pa_to_pvh(pa); 2470 mtx_enter(&pg->mdpage.pvmtx); 2471 if (pv->pv_pmap != NULL) { 2472 for (; pv; pv = pv->pv_next) { 2473 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2474 2475 /* Save REF/MOD info */ 2476 pv->pv_va |= pmap_tte2flags(data); 2477 2478 data &= ~(clear); 2479 data |= (set); 2480 KDASSERT((data & TLB_NFO) == 0); 2481 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) { 2482 printf("pmap_page_protect: gotten pseg empty!\n"); 2483 db_enter(); 2484 /* panic? */ 2485 } 2486 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2487 tsb_invalidate(pv->pv_pmap->pm_ctx, 2488 (pv->pv_va & PV_VAMASK)); 2489 tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx); 2490 } 2491 } 2492 } 2493 mtx_leave(&pg->mdpage.pvmtx); 2494 } else { 2495 pv_entry_t firstpv; 2496 /* remove mappings */ 2497 2498 firstpv = pa_to_pvh(pa); 2499 mtx_enter(&pg->mdpage.pvmtx); 2500 2501 /* First remove the entire list of continuation pv's*/ 2502 while ((pv = firstpv->pv_next) != NULL) { 2503 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2504 2505 /* Save REF/MOD info */ 2506 firstpv->pv_va |= pmap_tte2flags(data); 2507 2508 /* Clear mapping */ 2509 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) { 2510 printf("pmap_page_protect: gotten pseg empty!\n"); 2511 db_enter(); 2512 /* panic? */ 2513 } 2514 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2515 tsb_invalidate(pv->pv_pmap->pm_ctx, 2516 (pv->pv_va & PV_VAMASK)); 2517 tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx); 2518 } 2519 atomic_dec_long(&pv->pv_pmap->pm_stats.resident_count); 2520 2521 /* free the pv */ 2522 firstpv->pv_next = pv->pv_next; 2523 mtx_leave(&pg->mdpage.pvmtx); 2524 pool_put(&pv_pool, pv); 2525 mtx_enter(&pg->mdpage.pvmtx); 2526 } 2527 2528 pv = firstpv; 2529 2530 /* Then remove the primary pv */ 2531 if (pv->pv_pmap != NULL) { 2532 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2533 2534 /* Save REF/MOD info */ 2535 pv->pv_va |= pmap_tte2flags(data); 2536 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) { 2537 printf("pmap_page_protect: gotten pseg empty!\n"); 2538 db_enter(); 2539 /* panic? */ 2540 } 2541 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2542 tsb_invalidate(pv->pv_pmap->pm_ctx, 2543 (pv->pv_va & PV_VAMASK)); 2544 tlb_flush_pte(pv->pv_va & PV_VAMASK, 2545 pv->pv_pmap->pm_ctx); 2546 } 2547 atomic_dec_long(&pv->pv_pmap->pm_stats.resident_count); 2548 2549 KASSERT(pv->pv_next == NULL); 2550 /* dump the first pv */ 2551 pv->pv_pmap = NULL; 2552 } 2553 dcache_flush_page(pa); 2554 mtx_leave(&pg->mdpage.pvmtx); 2555 } 2556 /* We should really only flush the pages we demapped. */ 2557 } 2558 2559 /* 2560 * Allocate a context. If necessary, steal one from someone else. 2561 * Changes hardware context number and loads segment map. 2562 * 2563 * This routine is only ever called from locore.s just after it has 2564 * saved away the previous process, so there are no active user windows. 2565 * 2566 * The new context is flushed from the TLB before returning. 2567 */ 2568 int 2569 ctx_alloc(struct pmap *pm) 2570 { 2571 int s, cnum; 2572 static int next = 0; 2573 2574 if (pm == pmap_kernel()) { 2575 #ifdef DIAGNOSTIC 2576 printf("ctx_alloc: kernel pmap!\n"); 2577 #endif 2578 return (0); 2579 } 2580 s = splvm(); 2581 cnum = next; 2582 do { 2583 /* 2584 * We use the last context as an "invalid" context in 2585 * TSB tags. Never allocate (or bad things will happen). 2586 */ 2587 if (cnum >= numctx - 2) 2588 cnum = 0; 2589 } while (ctxbusy[++cnum] != 0 && cnum != next); 2590 if (cnum==0) cnum++; /* Never steal ctx 0 */ 2591 if (ctxbusy[cnum]) { 2592 int i; 2593 /* We gotta steal this context */ 2594 for (i = 0; i < TSBENTS; i++) { 2595 if (TSB_TAG_CTX(tsb_dmmu[i].tag) == cnum) 2596 tsb_dmmu[i].tag = TSB_TAG_INVALID; 2597 if (TSB_TAG_CTX(tsb_immu[i].tag) == cnum) 2598 tsb_immu[i].tag = TSB_TAG_INVALID; 2599 } 2600 tlb_flush_ctx(cnum); 2601 } 2602 ctxbusy[cnum] = pm->pm_physaddr; 2603 next = cnum; 2604 splx(s); 2605 pm->pm_ctx = cnum; 2606 return cnum; 2607 } 2608 2609 /* 2610 * Give away a context. 2611 */ 2612 void 2613 ctx_free(struct pmap *pm) 2614 { 2615 int oldctx; 2616 2617 oldctx = pm->pm_ctx; 2618 2619 if (oldctx == 0) 2620 panic("ctx_free: freeing kernel context"); 2621 #ifdef DIAGNOSTIC 2622 if (ctxbusy[oldctx] == 0) 2623 printf("ctx_free: freeing free context %d\n", oldctx); 2624 if (ctxbusy[oldctx] != pm->pm_physaddr) { 2625 printf("ctx_free: freeing someone esle's context\n " 2626 "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n", 2627 oldctx, (void *)(u_long)ctxbusy[oldctx], pm, 2628 (void *)(u_long)pm->pm_physaddr); 2629 db_enter(); 2630 } 2631 #endif 2632 /* We should verify it has not been stolen and reallocated... */ 2633 ctxbusy[oldctx] = 0; 2634 } 2635 2636 /* 2637 * Enter the pmap and virtual address into the 2638 * physical to virtual map table. 2639 */ 2640 pv_entry_t 2641 pmap_enter_pv(struct pmap *pmap, pv_entry_t npv, vaddr_t va, paddr_t pa) 2642 { 2643 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 2644 pv_entry_t pv = &pg->mdpage.pvent; 2645 2646 mtx_enter(&pg->mdpage.pvmtx); 2647 2648 if (pv->pv_pmap == NULL) { 2649 /* 2650 * No entries yet, use header as the first entry 2651 */ 2652 PV_SETVA(pv, va); 2653 pv->pv_pmap = pmap; 2654 pv->pv_next = NULL; 2655 2656 mtx_leave(&pg->mdpage.pvmtx); 2657 return (npv); 2658 } 2659 2660 if (npv == NULL) 2661 panic("%s: no pv entries available", __func__); 2662 2663 if (!(pv->pv_va & PV_ALIAS)) { 2664 /* 2665 * There is at least one other VA mapping this page. 2666 * Check if they are cache index compatible. If not 2667 * remove all mappings, flush the cache and set page 2668 * to be mapped uncached. Caching will be restored 2669 * when pages are mapped compatible again. 2670 */ 2671 if ((pv->pv_va ^ va) & VA_ALIAS_MASK) { 2672 pv->pv_va |= PV_ALIAS; 2673 pmap_page_cache(pmap, pa, 0); 2674 } 2675 } 2676 2677 /* 2678 * There is at least one other VA mapping this page. 2679 * Place this entry after the header. 2680 */ 2681 npv->pv_va = va & PV_VAMASK; 2682 npv->pv_pmap = pmap; 2683 npv->pv_next = pv->pv_next; 2684 pv->pv_next = npv; 2685 2686 mtx_leave(&pg->mdpage.pvmtx); 2687 return (NULL); 2688 } 2689 2690 /* 2691 * Remove a physical to virtual address translation. 2692 */ 2693 pv_entry_t 2694 pmap_remove_pv(struct pmap *pmap, vaddr_t va, paddr_t pa) 2695 { 2696 pv_entry_t pv, opv, npv = NULL; 2697 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 2698 int64_t data = 0LL; 2699 int alias; 2700 2701 opv = pv = &pg->mdpage.pvent; 2702 mtx_enter(&pg->mdpage.pvmtx); 2703 2704 /* 2705 * If it is the first entry on the list, it is actually 2706 * in the header and we must copy the following entry up 2707 * to the header. Otherwise we must search the list for 2708 * the entry. In either case we free the now unused entry. 2709 */ 2710 if (pmap == pv->pv_pmap && PV_MATCH(pv, va)) { 2711 /* Save modified/ref bits */ 2712 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2713 npv = pv->pv_next; 2714 if (npv) { 2715 /* First save mod/ref bits */ 2716 pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va; 2717 pv->pv_next = npv->pv_next; 2718 pv->pv_pmap = npv->pv_pmap; 2719 } else { 2720 pv->pv_pmap = NULL; 2721 pv->pv_next = NULL; 2722 pv->pv_va &= (PV_REF|PV_MOD); /* Only save ref/mod bits */ 2723 } 2724 } else { 2725 for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) { 2726 if (pmap == npv->pv_pmap && PV_MATCH(npv, va)) 2727 goto found; 2728 } 2729 2730 /* 2731 * Sometimes UVM gets confused and calls pmap_remove() instead 2732 * of pmap_kremove() 2733 */ 2734 mtx_leave(&pg->mdpage.pvmtx); 2735 return (NULL); 2736 found: 2737 pv->pv_next = npv->pv_next; 2738 2739 /* 2740 * move any referenced/modified info to the base pv 2741 */ 2742 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 2743 2744 /* 2745 * Here, if this page was aliased, we should try clear out any 2746 * alias that may have occurred. However, that's a complicated 2747 * operation involving multiple scans of the pv list. 2748 */ 2749 } 2750 2751 /* Save REF/MOD info */ 2752 opv->pv_va |= pmap_tte2flags(data); 2753 2754 /* Check to see if the alias went away */ 2755 if (opv->pv_va & PV_ALIAS) { 2756 alias = 0; 2757 for (pv = opv; pv; pv = pv->pv_next) { 2758 if ((pv->pv_va ^ opv->pv_va) & VA_ALIAS_MASK) { 2759 alias = 1; 2760 break; 2761 } 2762 } 2763 if (alias == 0) { 2764 opv->pv_va &= ~PV_ALIAS; 2765 pmap_page_cache(pmap, pa, 1); 2766 } 2767 } 2768 2769 mtx_leave(&pg->mdpage.pvmtx); 2770 return (npv); 2771 } 2772 2773 /* 2774 * pmap_page_cache: 2775 * 2776 * Change all mappings of a page to cached/uncached. 2777 */ 2778 void 2779 pmap_page_cache(struct pmap *pm, paddr_t pa, int mode) 2780 { 2781 pv_entry_t pv; 2782 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 2783 2784 if (CPU_ISSUN4US || CPU_ISSUN4V) 2785 return; 2786 2787 pv = &pg->mdpage.pvent; 2788 if (pv == NULL) 2789 return; 2790 2791 MUTEX_ASSERT_LOCKED(&pg->mdpage.pvmtx); 2792 2793 while (pv) { 2794 vaddr_t va; 2795 2796 va = (pv->pv_va & PV_VAMASK); 2797 if (mode) { 2798 /* Enable caching */ 2799 if (pseg_set(pv->pv_pmap, va, 2800 pseg_get(pv->pv_pmap, va) | SUN4U_TLB_CV, 0)) { 2801 printf("pmap_page_cache: aliased pseg empty!\n"); 2802 db_enter(); 2803 /* panic? */ 2804 } 2805 } else { 2806 /* Disable caching */ 2807 if (pseg_set(pv->pv_pmap, va, 2808 pseg_get(pv->pv_pmap, va) & ~SUN4U_TLB_CV, 0)) { 2809 printf("pmap_page_cache: aliased pseg empty!\n"); 2810 db_enter(); 2811 /* panic? */ 2812 } 2813 } 2814 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2815 tsb_invalidate(pv->pv_pmap->pm_ctx, va); 2816 /* Force reload -- protections may be changed */ 2817 tlb_flush_pte(va, pv->pv_pmap->pm_ctx); 2818 } 2819 2820 pv = pv->pv_next; 2821 } 2822 } 2823 2824 int 2825 pmap_get_page(paddr_t *pa, const char *wait, struct pmap *pm) 2826 { 2827 int reserve = pm == pmap_kernel() ? UVM_PGA_USERESERVE : 0; 2828 2829 if (uvm.page_init_done) { 2830 struct vm_page *pg; 2831 2832 while ((pg = uvm_pagealloc(NULL, 0, NULL, 2833 UVM_PGA_ZERO|reserve)) == NULL) { 2834 if (wait == NULL) 2835 return 0; 2836 uvm_wait(wait); 2837 } 2838 pg->wire_count++; 2839 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 2840 *pa = VM_PAGE_TO_PHYS(pg); 2841 } else { 2842 uvm_page_physget(pa); 2843 prom_claim_phys(*pa, PAGE_SIZE); 2844 pmap_zero_phys(*pa); 2845 } 2846 2847 return (1); 2848 } 2849 2850 void 2851 pmap_free_page(paddr_t pa, struct pmap *pm) 2852 { 2853 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 2854 2855 pg->wire_count = 0; 2856 uvm_pagefree(pg); 2857 } 2858 2859 void 2860 pmap_remove_holes(struct vmspace *vm) 2861 { 2862 vaddr_t shole, ehole; 2863 struct vm_map *map = &vm->vm_map; 2864 2865 /* 2866 * Although the hardware only supports 44-bit virtual addresses 2867 * (and thus a hole from 1 << 43 to -1 << 43), this pmap 2868 * implementation itself only supports 43-bit virtual addresses, 2869 * so we have to narrow the hole a bit more. 2870 */ 2871 shole = 1L << (HOLESHIFT - 1); 2872 ehole = -1L << (HOLESHIFT - 1); 2873 2874 shole = ulmax(vm_map_min(map), shole); 2875 ehole = ulmin(vm_map_max(map), ehole); 2876 2877 if (ehole <= shole) 2878 return; 2879 2880 (void)uvm_map(map, &shole, ehole - shole, NULL, UVM_UNKNOWN_OFFSET, 0, 2881 UVM_MAPFLAG(PROT_NONE, PROT_NONE, MAP_INHERIT_SHARE, MADV_RANDOM, 2882 UVM_FLAG_NOMERGE | UVM_FLAG_HOLE | UVM_FLAG_FIXED)); 2883 } 2884 2885 #ifdef DDB 2886 2887 void 2888 db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2889 { 2890 struct pv_entry *pv; 2891 2892 if (!have_addr) { 2893 db_printf("Need addr for pv\n"); 2894 return; 2895 } 2896 2897 for (pv = pa_to_pvh(addr); pv; pv = pv->pv_next) 2898 db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n", 2899 pv, pv->pv_next, pv->pv_pmap, 2900 (unsigned long long)pv->pv_va); 2901 2902 } 2903 2904 #endif 2905 2906 /* 2907 * Read an instruction from a given virtual memory address. 2908 * EXEC_ONLY mappings are bypassed. 2909 */ 2910 int 2911 pmap_copyinsn(pmap_t pmap, vaddr_t va, uint32_t *insn) 2912 { 2913 paddr_t pa; 2914 2915 if (pmap == pmap_kernel()) 2916 return EINVAL; 2917 2918 mtx_enter(&pmap->pm_mtx); 2919 /* inline pmap_extract */ 2920 pa = pseg_get(pmap, va) & TLB_PA_MASK; 2921 if (pa != 0) 2922 *insn = lduwa(pa | (va & PAGE_MASK), ASI_PHYS_CACHED); 2923 mtx_leave(&pmap->pm_mtx); 2924 2925 return pa == 0 ? EFAULT : 0; 2926 } 2927