1 /* $OpenBSD: pmap.c,v 1.91 2015/04/10 18:08:31 kettenis Exp $ */ 2 /* $NetBSD: pmap.c,v 1.107 2001/08/31 16:47:41 eeh Exp $ */ 3 #undef NO_VCACHE /* Don't forget the locked TLB in dostart */ 4 /* 5 * 6 * Copyright (C) 1996-1999 Eduardo Horvath. 7 * All rights reserved. 8 * 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/atomic.h> 31 #include <sys/param.h> 32 #include <sys/malloc.h> 33 #include <sys/queue.h> 34 #include <sys/systm.h> 35 #include <sys/proc.h> 36 #include <sys/msgbuf.h> 37 #include <sys/pool.h> 38 #include <sys/exec.h> 39 #include <sys/core.h> 40 #include <sys/kcore.h> 41 42 #include <uvm/uvm.h> 43 44 #include <machine/pcb.h> 45 #include <machine/sparc64.h> 46 #include <machine/ctlreg.h> 47 #include <machine/hypervisor.h> 48 #include <machine/openfirm.h> 49 #include <machine/kcore.h> 50 51 #include "cache.h" 52 53 #ifdef DDB 54 #include <machine/db_machdep.h> 55 #include <ddb/db_command.h> 56 #include <ddb/db_sym.h> 57 #include <ddb/db_variables.h> 58 #include <ddb/db_extern.h> 59 #include <ddb/db_access.h> 60 #include <ddb/db_output.h> 61 #define Debugger() __asm volatile("ta 1; nop"); 62 #else 63 #define Debugger() 64 #define db_printf printf 65 #endif 66 67 #define MEG (1<<20) /* 1MB */ 68 #define KB (1<<10) /* 1KB */ 69 70 paddr_t cpu0paddr;/* XXXXXXXXXXXXXXXX */ 71 72 extern int64_t asmptechk(int64_t *pseg[], int addr); /* DEBUG XXXXX */ 73 74 /* These routines are in assembly to allow access thru physical mappings */ 75 extern int64_t pseg_get(struct pmap*, vaddr_t addr); 76 extern int pseg_set(struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare); 77 78 /* XXX - temporary workaround for pmap_{copy,zero}_page api change */ 79 void pmap_zero_phys(paddr_t pa); 80 void pmap_copy_phys(paddr_t src, paddr_t dst); 81 82 /* 83 * Diatribe on ref/mod counting: 84 * 85 * First of all, ref/mod info must be non-volatile. Hence we need to keep it 86 * in the pv_entry structure for each page. (We could bypass this for the 87 * vm_page, but that's a long story....) 88 * 89 * This architecture has nice, fast traps with lots of space for software bits 90 * in the TTE. To accelerate ref/mod counts we make use of these features. 91 * 92 * When we map a page initially, we place a TTE in the page table. It's 93 * inserted with the TLB_W and TLB_ACCESS bits cleared. If a page is really 94 * writeable we set the TLB_REAL_W bit for the trap handler. 95 * 96 * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS 97 * bit in the approprate TTE in the page table. Whenever we take a protection 98 * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD 99 * bits to enable writing and mark the page as modified. 100 * 101 * This means that we may have ref/mod information all over the place. The 102 * pmap routines must traverse the page tables of all pmaps with a given page 103 * and collect/clear all the ref/mod information and copy it into the pv_entry. 104 */ 105 106 #ifdef NO_VCACHE 107 #define FORCE_ALIAS 1 108 #else 109 #define FORCE_ALIAS 0 110 #endif 111 112 #define PV_ALIAS 0x1LL 113 #define PV_REF 0x2LL 114 #define PV_MOD 0x4LL 115 #define PV_NVC 0x8LL 116 #define PV_NC 0x10LL 117 #define PV_WE 0x20LL /* Debug -- track if this page was ever writable */ 118 #define PV_MASK (0x03fLL) 119 #define PV_VAMASK (~(NBPG - 1)) 120 #define PV_MATCH(pv,va) (!((((pv)->pv_va) ^ (va)) & PV_VAMASK)) 121 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | (((pv)->pv_va) & PV_MASK))) 122 123 pv_entry_t pv_table; /* array of entries, one per page */ 124 static struct pool pv_pool; 125 static struct pool pmap_pool; 126 127 void pmap_remove_pv(struct pmap *pm, vaddr_t va, paddr_t pa); 128 void pmap_enter_pv(struct pmap *pm, vaddr_t va, paddr_t pa); 129 void pmap_page_cache(struct pmap *pm, paddr_t pa, int mode); 130 131 void pmap_bootstrap_cpu(paddr_t); 132 133 void pmap_pinit(struct pmap *); 134 void pmap_release(struct pmap *); 135 pv_entry_t pa_to_pvh(paddr_t); 136 137 u_int64_t first_phys_addr; 138 139 pv_entry_t 140 pa_to_pvh(paddr_t pa) 141 { 142 struct vm_page *pg; 143 144 pg = PHYS_TO_VM_PAGE(pa); 145 return pg ? &pg->mdpage.pvent : NULL; 146 } 147 148 static __inline u_int 149 pmap_tte2flags(u_int64_t tte) 150 { 151 if (CPU_ISSUN4V) 152 return (((tte & SUN4V_TLB_ACCESS) ? PV_REF : 0) | 153 ((tte & SUN4V_TLB_MODIFY) ? PV_MOD : 0)); 154 else 155 return (((tte & SUN4U_TLB_ACCESS) ? PV_REF : 0) | 156 ((tte & SUN4U_TLB_MODIFY) ? PV_MOD : 0)); 157 } 158 159 /* 160 * Here's the CPU TSB stuff. It's allocated in pmap_bootstrap. 161 */ 162 pte_t *tsb_dmmu; 163 pte_t *tsb_immu; 164 int tsbsize; /* tsbents = 512 * 2^tsbsize */ 165 #define TSBENTS (512 << tsbsize) 166 #define TSBSIZE (TSBENTS * 16) 167 168 /* 169 * The invalid tsb tag uses the fact that the last context we have is 170 * never allocated. 171 */ 172 #define TSB_TAG_INVALID (~0LL << 48) 173 174 #define TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) \ 175 (CPU_ISSUN4V ?\ 176 SUN4V_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) : \ 177 SUN4U_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie)) 178 179 /* The same for sun4u and sun4v. */ 180 #define TLB_V SUN4U_TLB_V 181 182 /* Only used for DEBUG. */ 183 #define TLB_NFO (CPU_ISSUN4V ? SUN4V_TLB_NFO : SUN4U_TLB_NFO) 184 185 /* 186 * UltraSPARC T1 & T2 implement only a 40-bit real address range, just 187 * like older UltraSPARC CPUs. 188 */ 189 #define TLB_PA_MASK SUN4U_TLB_PA_MASK 190 191 /* XXX */ 192 #define TLB_TSB_LOCK (CPU_ISSUN4V ? SUN4V_TLB_TSB_LOCK : SUN4U_TLB_TSB_LOCK) 193 194 #ifdef SUN4V 195 struct tsb_desc *tsb_desc; 196 #endif 197 198 struct pmap kernel_pmap_; 199 200 extern int physmem; 201 /* 202 * Virtual and physical addresses of the start and end of kernel text 203 * and data segments. 204 */ 205 vaddr_t ktext; 206 paddr_t ktextp; 207 vaddr_t ektext; 208 paddr_t ektextp; 209 vaddr_t kdata; 210 paddr_t kdatap; 211 vaddr_t ekdata; 212 paddr_t ekdatap; 213 214 static int npgs; 215 static struct mem_region memlist[8]; /* Pick a random size here */ 216 217 vaddr_t vmmap; /* one reserved MI vpage for /dev/mem */ 218 219 struct mem_region *mem, *avail, *orig; 220 int memsize; 221 222 static int memh = 0, vmemh = 0; /* Handles to OBP devices */ 223 224 static int ptelookup_va(vaddr_t va); /* sun4u */ 225 226 static __inline void 227 tsb_invalidate(int ctx, vaddr_t va) 228 { 229 int i; 230 int64_t tag; 231 232 i = ptelookup_va(va); 233 tag = TSB_TAG(0, ctx, va); 234 if (tsb_dmmu[i].tag == tag) 235 atomic_cas_ulong((volatile unsigned long *)&tsb_dmmu[i].tag, 236 tag, TSB_TAG_INVALID); 237 if (tsb_immu[i].tag == tag) 238 atomic_cas_ulong((volatile unsigned long *)&tsb_immu[i].tag, 239 tag, TSB_TAG_INVALID); 240 } 241 242 struct prom_map *prom_map; 243 int prom_map_size; 244 245 #ifdef DEBUG 246 #define PDB_BOOT 0x20000 247 #define PDB_BOOT1 0x40000 248 int pmapdebug = 0; 249 250 #define BDPRINTF(n, f) if (pmapdebug & (n)) prom_printf f 251 #else 252 #define BDPRINTF(n, f) 253 #endif 254 255 /* 256 * 257 * A context is simply a small number that differentiates multiple mappings 258 * of the same address. Contexts on the spitfire are 13 bits, but could 259 * be as large as 17 bits. 260 * 261 * Each context is either free or attached to a pmap. 262 * 263 * The context table is an array of pointers to psegs. Just dereference 264 * the right pointer and you get to the pmap segment tables. These are 265 * physical addresses, of course. 266 * 267 */ 268 paddr_t *ctxbusy; 269 int numctx; 270 #define CTXENTRY (sizeof(paddr_t)) 271 #define CTXSIZE (numctx * CTXENTRY) 272 273 int pmap_get_page(paddr_t *, const char *, struct pmap *); 274 void pmap_free_page(paddr_t, struct pmap *); 275 276 /* 277 * Support for big page sizes. This maps the page size to the 278 * page bits. That is: these are the bits between 8K pages and 279 * larger page sizes that cause aliasing. 280 */ 281 struct page_size_map page_size_map[] = { 282 { (4*1024*1024-1) & ~(8*1024-1), PGSZ_4M }, 283 { (512*1024-1) & ~(8*1024-1), PGSZ_512K }, 284 { (64*1024-1) & ~(8*1024-1), PGSZ_64K }, 285 { (8*1024-1) & ~(8*1024-1), PGSZ_8K }, 286 { 0, PGSZ_8K&0 } 287 }; 288 289 /* 290 * Enter a TTE into the kernel pmap only. Don't do anything else. 291 * 292 * Use only during bootstrapping since it does no locking and 293 * can lose ref/mod info!!!! 294 * 295 */ 296 static void 297 pmap_enter_kpage(vaddr_t va, int64_t data) 298 { 299 paddr_t newp; 300 301 newp = 0; 302 while (pseg_set(pmap_kernel(), va, data, newp) == 1) { 303 newp = 0; 304 if (!pmap_get_page(&newp, NULL, pmap_kernel())) { 305 prom_printf("pmap_enter_kpage: out of pages\n"); 306 panic("pmap_enter_kpage"); 307 } 308 pmap_kernel()->pm_stats.resident_count++; 309 310 BDPRINTF(PDB_BOOT1, 311 ("pseg_set: pm=%p va=%p data=%lx newp %lx\r\n", 312 pmap_kernel(), va, (long)data, (long)newp)); 313 } 314 } 315 316 /* 317 * Check bootargs to see if we need to enable bootdebug. 318 */ 319 #ifdef DEBUG 320 void 321 pmap_bootdebug(void) 322 { 323 int chosen; 324 char *cp; 325 char buf[128]; 326 327 /* 328 * Grab boot args from PROM 329 */ 330 chosen = OF_finddevice("/chosen"); 331 /* Setup pointer to boot flags */ 332 OF_getprop(chosen, "bootargs", buf, sizeof(buf)); 333 cp = buf; 334 while (*cp != '-') 335 if (*cp++ == '\0') 336 return; 337 for (;;) 338 switch (*++cp) { 339 case '\0': 340 return; 341 case 'V': 342 pmapdebug |= PDB_BOOT|PDB_BOOT1; 343 break; 344 case 'D': 345 pmapdebug |= PDB_BOOT1; 346 break; 347 } 348 } 349 #endif 350 351 /* 352 * This is called during bootstrap, before the system is really initialized. 353 * 354 * It's called with the start and end virtual addresses of the kernel. We 355 * bootstrap the pmap allocator now. We will allocate the basic structures we 356 * need to bootstrap the VM system here: the page frame tables, the TSB, and 357 * the free memory lists. 358 * 359 * Now all this is becoming a bit obsolete. maxctx is still important, but by 360 * separating the kernel text and data segments we really would need to 361 * provide the start and end of each segment. But we can't. The rodata 362 * segment is attached to the end of the kernel segment and has nothing to 363 * delimit its end. We could still pass in the beginning of the kernel and 364 * the beginning and end of the data segment but we could also just as easily 365 * calculate that all in here. 366 * 367 * To handle the kernel text, we need to do a reverse mapping of the start of 368 * the kernel, then traverse the free memory lists to find out how big it is. 369 */ 370 371 void 372 pmap_bootstrap(u_long kernelstart, u_long kernelend, u_int maxctx, u_int numcpus) 373 { 374 extern int data_start[], end[]; /* start of data segment */ 375 extern int msgbufmapped; 376 struct mem_region *mp, *mp1; 377 int msgbufsiz; 378 int pcnt; 379 size_t s, sz; 380 int i, j; 381 int64_t data; 382 vaddr_t va; 383 u_int64_t phys_msgbuf; 384 paddr_t newkp; 385 vaddr_t newkv, firstaddr, intstk; 386 vsize_t kdsize, ktsize; 387 388 #ifdef DEBUG 389 pmap_bootdebug(); 390 #endif 391 392 BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\r\n")); 393 /* 394 * set machine page size 395 */ 396 uvmexp.pagesize = NBPG; 397 uvm_setpagesize(); 398 399 /* 400 * Find out how big the kernel's virtual address 401 * space is. The *$#@$ prom loses this info 402 */ 403 if ((vmemh = OF_finddevice("/virtual-memory")) == -1) { 404 prom_printf("no virtual-memory?"); 405 OF_exit(); 406 } 407 bzero((caddr_t)memlist, sizeof(memlist)); 408 if (OF_getprop(vmemh, "available", memlist, sizeof(memlist)) <= 0) { 409 prom_printf("no vmemory avail?"); 410 OF_exit(); 411 } 412 413 #ifdef DEBUG 414 if (pmapdebug & PDB_BOOT) { 415 /* print out mem list */ 416 prom_printf("Available virtual memory:\r\n"); 417 for (mp = memlist; mp->size; mp++) { 418 prom_printf("memlist start %p size %lx\r\n", 419 (void *)(u_long)mp->start, 420 (u_long)mp->size); 421 } 422 prom_printf("End of available virtual memory\r\n"); 423 } 424 #endif 425 /* 426 * Get hold or the message buffer. 427 */ 428 msgbufp = (struct msgbuf *)(vaddr_t)MSGBUF_VA; 429 /* XXXXX -- increase msgbufsiz for uvmhist printing */ 430 msgbufsiz = 4*NBPG /* round_page(sizeof(struct msgbuf)) */; 431 BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\r\n", 432 (long)msgbufp, (long)msgbufsiz)); 433 if ((long)msgbufp != 434 (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz))) 435 prom_printf( 436 "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\r\n", 437 (void *)msgbufp, (long)phys_msgbuf); 438 phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN); 439 BDPRINTF(PDB_BOOT, 440 ("We should have the memory at %lx, let's map it in\r\n", 441 phys_msgbuf)); 442 if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, 443 -1/* sunos does this */) == -1) 444 prom_printf("Failed to map msgbuf\r\n"); 445 else 446 BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\r\n", 447 (void *)msgbufp)); 448 msgbufmapped = 1; /* enable message buffer */ 449 initmsgbuf((caddr_t)msgbufp, msgbufsiz); 450 451 /* 452 * Record kernel mapping -- we will map these with a permanent 4MB 453 * TLB entry when we initialize the CPU later. 454 */ 455 BDPRINTF(PDB_BOOT, ("translating kernelstart %p\r\n", 456 (void *)kernelstart)); 457 ktext = kernelstart; 458 ktextp = prom_vtop(kernelstart); 459 460 kdata = (vaddr_t)data_start; 461 kdatap = prom_vtop(kdata); 462 ekdata = (vaddr_t)end; 463 464 /* 465 * Find the real size of the kernel. Locate the smallest starting 466 * address > kernelstart. 467 */ 468 for (mp1 = mp = memlist; mp->size; mp++) { 469 /* 470 * Check whether this region is at the end of the kernel. 471 */ 472 if (mp->start >= ekdata && (mp1->start < ekdata || 473 mp1->start > mp->start)) 474 mp1 = mp; 475 } 476 if (mp1->start < kdata) 477 prom_printf("Kernel at end of vmem???\r\n"); 478 479 BDPRINTF(PDB_BOOT1, 480 ("Kernel data is mapped at %lx, next free seg: %lx, %lx\r\n", 481 (long)kdata, (u_long)mp1->start, (u_long)mp1->size)); 482 483 /* 484 * We save where we can start allocating memory. 485 */ 486 firstaddr = (ekdata + 07) & ~ 07; /* Longword align */ 487 488 /* 489 * We reserve 100K to grow. 490 */ 491 ekdata += 100*KB; 492 493 /* 494 * And set the end of the data segment to the end of what our 495 * bootloader allocated for us, if we still fit in there. 496 */ 497 if (ekdata < mp1->start) 498 ekdata = mp1->start; 499 500 #define valloc(name, type, num) (name) = (type *)firstaddr; firstaddr += (num) 501 502 /* 503 * Since we can't always give the loader the hint to align us on a 4MB 504 * boundary, we will need to do the alignment ourselves. First 505 * allocate a new 4MB aligned segment for the kernel, then map it 506 * in, copy the kernel over, swap mappings, then finally, free the 507 * old kernel. Then we can continue with this. 508 * 509 * We'll do the data segment up here since we know how big it is. 510 * We'll do the text segment after we've read in the PROM translations 511 * so we can figure out its size. 512 * 513 * The ctxbusy table takes about 64KB, the TSB up to 32KB, and the 514 * rest should be less than 1K, so 100KB extra should be plenty. 515 */ 516 kdsize = round_page(ekdata - kdata); 517 BDPRINTF(PDB_BOOT1, ("Kernel data size is %lx\r\n", (long)kdsize)); 518 519 if ((kdatap & (4*MEG-1)) == 0) { 520 /* We were at a 4MB boundary -- claim the rest */ 521 psize_t szdiff = (4*MEG - kdsize) & (4*MEG - 1); 522 523 BDPRINTF(PDB_BOOT1, ("Need to extend dseg by %lx\r\n", 524 (long)szdiff)); 525 if (szdiff) { 526 /* Claim the rest of the physical page. */ 527 newkp = kdatap + kdsize; 528 newkv = kdata + kdsize; 529 if (newkp != prom_claim_phys(newkp, szdiff)) { 530 prom_printf("pmap_bootstrap: could not claim " 531 "physical dseg extension " 532 "at %lx size %lx\r\n", 533 newkp, szdiff); 534 goto remap_data; 535 } 536 537 /* And the rest of the virtual page. */ 538 if (prom_claim_virt(newkv, szdiff) != newkv) 539 prom_printf("pmap_bootstrap: could not claim " 540 "virtual dseg extension " 541 "at size %lx\r\n", newkv, szdiff); 542 543 /* Make sure all 4MB are mapped */ 544 prom_map_phys(newkp, szdiff, newkv, -1); 545 } 546 } else { 547 psize_t sz; 548 remap_data: 549 /* 550 * Either we're not at a 4MB boundary or we can't get the rest 551 * of the 4MB extension. We need to move the data segment. 552 * Leave 1MB of extra fiddle space in the calculations. 553 */ 554 555 sz = (kdsize + 4*MEG - 1) & ~(4*MEG-1); 556 BDPRINTF(PDB_BOOT1, 557 ("Allocating new %lx kernel data at 4MB boundary\r\n", 558 (u_long)sz)); 559 if ((newkp = prom_alloc_phys(sz, 4*MEG)) == (paddr_t)-1 ) { 560 prom_printf("Cannot allocate new kernel\r\n"); 561 OF_exit(); 562 } 563 BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n", 564 (u_int64_t)newkp)); 565 if ((newkv = (vaddr_t)prom_alloc_virt(sz, 8)) == 566 (vaddr_t)-1) { 567 prom_printf("Cannot allocate new kernel va\r\n"); 568 OF_exit(); 569 } 570 BDPRINTF(PDB_BOOT1, ("Mapping in buffer %llx at %llx\r\n", 571 (u_int64_t)newkp, (u_int64_t)newkv)); 572 prom_map_phys(newkp, sz, (vaddr_t)newkv, -1); 573 BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel data...", 574 kdsize)); 575 bzero((void *)newkv, sz); 576 bcopy((void *)kdata, (void *)newkv, kdsize); 577 BDPRINTF(PDB_BOOT1, ("done. Swapping maps..unmap new\r\n")); 578 prom_unmap_virt((vaddr_t)newkv, sz); 579 BDPRINTF(PDB_BOOT, ("remap old ")); 580 #if 0 581 /* 582 * calling the prom will probably require reading part of the 583 * data segment so we can't do this. */ 584 prom_unmap_virt((vaddr_t)kdatap, kdsize); 585 #endif 586 prom_map_phys(newkp, sz, kdata, -1); 587 /* 588 * we will map in 4MB, more than we allocated, to allow 589 * further allocation 590 */ 591 BDPRINTF(PDB_BOOT1, ("free old\r\n")); 592 prom_free_phys(kdatap, kdsize); 593 kdatap = newkp; 594 BDPRINTF(PDB_BOOT1, 595 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)" 596 "avail for kernel\r\n", (u_long)firstaddr, 597 (u_long)prom_vtop(firstaddr))); 598 } 599 600 /* 601 * Find out how much RAM we have installed. 602 */ 603 BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\r\n")); 604 if ((memh = OF_finddevice("/memory")) == -1) { 605 prom_printf("no memory?"); 606 OF_exit(); 607 } 608 memsize = OF_getproplen(memh, "reg") + 2 * sizeof(struct mem_region); 609 valloc(mem, struct mem_region, memsize); 610 bzero((caddr_t)mem, memsize); 611 if (OF_getprop(memh, "reg", mem, memsize) <= 0) { 612 prom_printf("no memory installed?"); 613 OF_exit(); 614 } 615 616 #ifdef DEBUG 617 if (pmapdebug & PDB_BOOT1) { 618 /* print out mem list */ 619 prom_printf("Installed physical memory:\r\n"); 620 for (mp = mem; mp->size; mp++) { 621 prom_printf("memlist start %lx size %lx\r\n", 622 (u_long)mp->start, (u_long)mp->size); 623 } 624 } 625 #endif 626 BDPRINTF(PDB_BOOT1, ("Calculating physmem:")); 627 628 for (mp = mem; mp->size; mp++) 629 physmem += atop(mp->size); 630 BDPRINTF(PDB_BOOT1, (" result %x or %d pages\r\n", 631 (int)physmem, (int)physmem)); 632 633 /* 634 * Calculate approx TSB size. 635 */ 636 tsbsize = 0; 637 #ifdef SMALL_KERNEL 638 while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 2) 639 #else 640 while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 7) 641 #endif 642 tsbsize++; 643 644 /* 645 * Save the prom translations 646 */ 647 sz = OF_getproplen(vmemh, "translations"); 648 valloc(prom_map, struct prom_map, sz); 649 if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <= 0) { 650 prom_printf("no translations installed?"); 651 OF_exit(); 652 } 653 prom_map_size = sz / sizeof(struct prom_map); 654 #ifdef DEBUG 655 if (pmapdebug & PDB_BOOT) { 656 /* print out mem list */ 657 prom_printf("Prom xlations:\r\n"); 658 for (i = 0; i < prom_map_size; i++) { 659 prom_printf("start %016lx size %016lx tte %016lx\r\n", 660 (u_long)prom_map[i].vstart, 661 (u_long)prom_map[i].vsize, 662 (u_long)prom_map[i].tte); 663 } 664 prom_printf("End of prom xlations\r\n"); 665 } 666 #endif 667 /* 668 * Hunt for the kernel text segment and figure out it size and 669 * alignment. 670 */ 671 ktsize = 0; 672 for (i = 0; i < prom_map_size; i++) 673 if (prom_map[i].vstart == ktext + ktsize) 674 ktsize += prom_map[i].vsize; 675 if (ktsize == 0) 676 panic("No kernel text segment!"); 677 ektext = ktext + ktsize; 678 679 if (ktextp & (4*MEG-1)) { 680 /* Kernel text is not 4MB aligned -- need to fix that */ 681 BDPRINTF(PDB_BOOT1, 682 ("Allocating new %lx kernel text at 4MB boundary\r\n", 683 (u_long)ktsize)); 684 if ((newkp = prom_alloc_phys(ktsize, 4*MEG)) == 0 ) { 685 prom_printf("Cannot allocate new kernel text\r\n"); 686 OF_exit(); 687 } 688 BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n", 689 (u_int64_t)newkp)); 690 if ((newkv = (vaddr_t)prom_alloc_virt(ktsize, 8)) == 691 (vaddr_t)-1) { 692 prom_printf("Cannot allocate new kernel text va\r\n"); 693 OF_exit(); 694 } 695 BDPRINTF(PDB_BOOT1, ("Mapping in buffer %lx at %lx\r\n", 696 (u_long)newkp, (u_long)newkv)); 697 prom_map_phys(newkp, ktsize, (vaddr_t)newkv, -1); 698 BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel text...", 699 ktsize)); 700 bcopy((void *)ktext, (void *)newkv, 701 ktsize); 702 BDPRINTF(PDB_BOOT1, ("done. Swapping maps..unmap new\r\n")); 703 prom_unmap_virt((vaddr_t)newkv, 4*MEG); 704 BDPRINTF(PDB_BOOT, ("remap old ")); 705 #if 0 706 /* 707 * calling the prom will probably require reading part of the 708 * text segment so we can't do this. 709 */ 710 prom_unmap_virt((vaddr_t)ktextp, ktsize); 711 #endif 712 prom_map_phys(newkp, ktsize, ktext, -1); 713 /* 714 * we will map in 4MB, more than we allocated, to allow 715 * further allocation 716 */ 717 BDPRINTF(PDB_BOOT1, ("free old\r\n")); 718 prom_free_phys(ktextp, ktsize); 719 ktextp = newkp; 720 721 BDPRINTF(PDB_BOOT1, 722 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)" 723 "avail for kernel\r\n", (u_long)firstaddr, 724 (u_long)prom_vtop(firstaddr))); 725 726 /* 727 * Re-fetch translations -- they've certainly changed. 728 */ 729 if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <= 730 0) { 731 prom_printf("no translations installed?"); 732 OF_exit(); 733 } 734 #ifdef DEBUG 735 if (pmapdebug & PDB_BOOT) { 736 /* print out mem list */ 737 prom_printf("New prom xlations:\r\n"); 738 for (i = 0; i < prom_map_size; i++) { 739 prom_printf("start %016lx size %016lx tte %016lx\r\n", 740 (u_long)prom_map[i].vstart, 741 (u_long)prom_map[i].vsize, 742 (u_long)prom_map[i].tte); 743 } 744 prom_printf("End of prom xlations\r\n"); 745 } 746 #endif 747 } 748 ektextp = ktextp + ktsize; 749 750 /* 751 * Here's a quick in-lined reverse bubble sort. It gets rid of 752 * any translations inside the kernel data VA range. 753 */ 754 for(i = 0; i < prom_map_size; i++) { 755 if (prom_map[i].vstart >= kdata && 756 prom_map[i].vstart <= firstaddr) { 757 prom_map[i].vstart = 0; 758 prom_map[i].vsize = 0; 759 } 760 if (prom_map[i].vstart >= ktext && 761 prom_map[i].vstart <= ektext) { 762 prom_map[i].vstart = 0; 763 prom_map[i].vsize = 0; 764 } 765 for(j = i; j < prom_map_size; j++) { 766 if (prom_map[j].vstart >= kdata && 767 prom_map[j].vstart <= firstaddr) 768 continue; /* this is inside the kernel */ 769 if (prom_map[j].vstart >= ktext && 770 prom_map[j].vstart <= ektext) 771 continue; /* this is inside the kernel */ 772 if (prom_map[j].vstart > prom_map[i].vstart) { 773 struct prom_map tmp; 774 tmp = prom_map[i]; 775 prom_map[i] = prom_map[j]; 776 prom_map[j] = tmp; 777 } 778 } 779 } 780 #ifdef DEBUG 781 if (pmapdebug & PDB_BOOT) { 782 /* print out mem list */ 783 prom_printf("Prom xlations:\r\n"); 784 for (i = 0; i < prom_map_size; i++) { 785 prom_printf("start %016lx size %016lx tte %016lx\r\n", 786 (u_long)prom_map[i].vstart, 787 (u_long)prom_map[i].vsize, 788 (u_long)prom_map[i].tte); 789 } 790 prom_printf("End of prom xlations\r\n"); 791 } 792 #endif 793 794 /* 795 * Allocate a 64KB page for the cpu_info structure now. 796 */ 797 if ((cpu0paddr = prom_alloc_phys(numcpus * 8*NBPG, 8*NBPG)) == 0 ) { 798 prom_printf("Cannot allocate new cpu_info\r\n"); 799 OF_exit(); 800 } 801 802 803 /* 804 * Now the kernel text segment is in its final location we can try to 805 * find out how much memory really is free. 806 */ 807 sz = OF_getproplen(memh, "available") + sizeof(struct mem_region); 808 valloc(orig, struct mem_region, sz); 809 bzero((caddr_t)orig, sz); 810 if (OF_getprop(memh, "available", orig, sz) <= 0) { 811 prom_printf("no available RAM?"); 812 OF_exit(); 813 } 814 #ifdef DEBUG 815 if (pmapdebug & PDB_BOOT1) { 816 /* print out mem list */ 817 prom_printf("Available physical memory:\r\n"); 818 for (mp = orig; mp->size; mp++) { 819 prom_printf("memlist start %lx size %lx\r\n", 820 (u_long)mp->start, (u_long)mp->size); 821 } 822 prom_printf("End of available physical memory\r\n"); 823 } 824 #endif 825 valloc(avail, struct mem_region, sz); 826 bzero((caddr_t)avail, sz); 827 for (pcnt = 0, mp = orig, mp1 = avail; (mp1->size = mp->size); 828 mp++, mp1++) { 829 mp1->start = mp->start; 830 pcnt++; 831 } 832 833 /* 834 * Allocate and initialize a context table 835 */ 836 numctx = maxctx; 837 valloc(ctxbusy, paddr_t, CTXSIZE); 838 bzero((caddr_t)ctxbusy, CTXSIZE); 839 840 /* 841 * Allocate our TSB. 842 * 843 * We will use the left over space to flesh out the kernel pmap. 844 */ 845 BDPRINTF(PDB_BOOT1, ("firstaddr before TSB=%lx\r\n", 846 (u_long)firstaddr)); 847 firstaddr = ((firstaddr + TSBSIZE - 1) & ~(TSBSIZE-1)); 848 #ifdef DEBUG 849 i = (firstaddr + (NBPG-1)) & ~(NBPG-1); /* First, page align */ 850 if ((int)firstaddr < i) { 851 prom_printf("TSB alloc fixup failed\r\n"); 852 prom_printf("frobbed i, firstaddr before TSB=%x, %lx\r\n", 853 (int)i, (u_long)firstaddr); 854 panic("TSB alloc"); 855 OF_exit(); 856 } 857 #endif 858 BDPRINTF(PDB_BOOT, ("frobbed i, firstaddr before TSB=%x, %lx\r\n", 859 (int)i, (u_long)firstaddr)); 860 valloc(tsb_dmmu, pte_t, TSBSIZE); 861 bzero(tsb_dmmu, TSBSIZE); 862 valloc(tsb_immu, pte_t, TSBSIZE); 863 bzero(tsb_immu, TSBSIZE); 864 865 BDPRINTF(PDB_BOOT1, ("firstaddr after TSB=%lx\r\n", (u_long)firstaddr)); 866 BDPRINTF(PDB_BOOT1, ("TSB allocated at %p size %08x\r\n", (void *)tsb_dmmu, 867 (int)TSBSIZE)); 868 869 #ifdef SUN4V 870 if (CPU_ISSUN4V) { 871 valloc(tsb_desc, struct tsb_desc, sizeof(struct tsb_desc)); 872 bzero(tsb_desc, sizeof(struct tsb_desc)); 873 tsb_desc->td_idxpgsz = 0; 874 tsb_desc->td_assoc = 1; 875 tsb_desc->td_size = TSBENTS; 876 tsb_desc->td_ctxidx = -1; 877 tsb_desc->td_pgsz = 0xf; 878 tsb_desc->td_pa = (paddr_t)tsb_dmmu + kdatap - kdata; 879 } 880 #endif 881 882 first_phys_addr = mem->start; 883 BDPRINTF(PDB_BOOT1, ("firstaddr after pmap=%08lx\r\n", 884 (u_long)firstaddr)); 885 886 /* 887 * Page align all regions. 888 * Non-page memory isn't very interesting to us. 889 * Also, sort the entries for ascending addresses. 890 * 891 * And convert from virtual to physical addresses. 892 */ 893 894 BDPRINTF(PDB_BOOT, ("kernel virtual size %08lx - %08lx\r\n", 895 (u_long)kernelstart, (u_long)firstaddr)); 896 kdata = kdata & ~PGOFSET; 897 ekdata = firstaddr; 898 ekdata = (ekdata + PGOFSET) & ~PGOFSET; 899 BDPRINTF(PDB_BOOT1, ("kernel virtual size %08lx - %08lx\r\n", 900 (u_long)kernelstart, (u_long)kernelend)); 901 ekdatap = ekdata - kdata + kdatap; 902 /* Switch from vaddrs to paddrs */ 903 if(ekdatap > (kdatap + 4*MEG)) { 904 prom_printf("Kernel size exceeds 4MB\r\n"); 905 } 906 907 #ifdef DEBUG 908 if (pmapdebug & PDB_BOOT1) { 909 /* print out mem list */ 910 prom_printf("Available %lx physical memory before cleanup:\r\n", 911 (u_long)avail); 912 for (mp = avail; mp->size; mp++) { 913 prom_printf("memlist start %lx size %lx\r\n", 914 (u_long)mp->start, 915 (u_long)mp->size); 916 } 917 prom_printf("End of available physical memory before cleanup\r\n"); 918 prom_printf("kernel physical text size %08lx - %08lx\r\n", 919 (u_long)ktextp, (u_long)ektextp); 920 prom_printf("kernel physical data size %08lx - %08lx\r\n", 921 (u_long)kdatap, (u_long)ekdatap); 922 } 923 #endif 924 /* 925 * Here's a another quick in-lined bubble sort. 926 */ 927 for (i = 0; i < pcnt; i++) { 928 for (j = i; j < pcnt; j++) { 929 if (avail[j].start < avail[i].start) { 930 struct mem_region tmp; 931 tmp = avail[i]; 932 avail[i] = avail[j]; 933 avail[j] = tmp; 934 } 935 } 936 } 937 938 /* Throw away page zero if we have it. */ 939 if (avail->start == 0) { 940 avail->start += NBPG; 941 avail->size -= NBPG; 942 } 943 /* 944 * Now we need to remove the area we valloc'ed from the available 945 * memory lists. (NB: we may have already alloc'ed the entire space). 946 */ 947 npgs = 0; 948 for (mp = avail; mp->size; mp++) { 949 /* 950 * Check whether this region holds all of the kernel. 951 */ 952 s = mp->start + mp->size; 953 if (mp->start < kdatap && s > roundup(ekdatap, 4*MEG)) { 954 avail[pcnt].start = roundup(ekdatap, 4*MEG); 955 avail[pcnt++].size = s - kdatap; 956 mp->size = kdatap - mp->start; 957 } 958 /* 959 * Look whether this regions starts within the kernel. 960 */ 961 if (mp->start >= kdatap && 962 mp->start < roundup(ekdatap, 4*MEG)) { 963 s = ekdatap - mp->start; 964 if (mp->size > s) 965 mp->size -= s; 966 else 967 mp->size = 0; 968 mp->start = roundup(ekdatap, 4*MEG); 969 } 970 /* 971 * Now look whether this region ends within the kernel. 972 */ 973 s = mp->start + mp->size; 974 if (s > kdatap && s < roundup(ekdatap, 4*MEG)) 975 mp->size -= s - kdatap; 976 /* 977 * Now page align the start of the region. 978 */ 979 s = mp->start % NBPG; 980 if (mp->size >= s) { 981 mp->size -= s; 982 mp->start += s; 983 } 984 /* 985 * And now align the size of the region. 986 */ 987 mp->size -= mp->size % NBPG; 988 /* 989 * Check whether some memory is left here. 990 */ 991 if (mp->size == 0) { 992 bcopy(mp + 1, mp, 993 (pcnt - (mp - avail)) * sizeof *mp); 994 pcnt--; 995 mp--; 996 continue; 997 } 998 s = mp->start; 999 sz = mp->size; 1000 npgs += atop(sz); 1001 for (mp1 = avail; mp1 < mp; mp1++) 1002 if (s < mp1->start) 1003 break; 1004 if (mp1 < mp) { 1005 bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1); 1006 mp1->start = s; 1007 mp1->size = sz; 1008 } 1009 /* 1010 * In future we should be able to specify both allocated 1011 * and free. 1012 */ 1013 uvm_page_physload( 1014 atop(mp->start), 1015 atop(mp->start+mp->size), 1016 atop(mp->start), 1017 atop(mp->start+mp->size), 0); 1018 } 1019 1020 #if 0 1021 /* finally, free up any space that valloc did not use */ 1022 prom_unmap_virt((vaddr_t)ekdata, roundup(ekdata, 4*MEG) - ekdata); 1023 if (ekdatap < roundup(kdatap, 4*MEG))) { 1024 uvm_page_physload(atop(ekdatap), 1025 atop(roundup(ekdatap, (4*MEG))), 1026 atop(ekdatap), 1027 atop(roundup(ekdatap, (4*MEG))), 0); 1028 } 1029 #endif 1030 1031 #ifdef DEBUG 1032 if (pmapdebug & PDB_BOOT) { 1033 /* print out mem list */ 1034 prom_printf("Available physical memory after cleanup:\r\n"); 1035 for (mp = avail; mp->size; mp++) { 1036 prom_printf("avail start %lx size %lx\r\n", 1037 (long)mp->start, (long)mp->size); 1038 } 1039 prom_printf("End of available physical memory after cleanup\r\n"); 1040 } 1041 #endif 1042 /* 1043 * Allocate and clear out pmap_kernel()->pm_segs[] 1044 */ 1045 pmap_kernel()->pm_refs = 1; 1046 pmap_kernel()->pm_ctx = 0; 1047 { 1048 paddr_t newp; 1049 1050 do { 1051 pmap_get_page(&newp, NULL, pmap_kernel()); 1052 } while (!newp); /* Throw away page zero */ 1053 pmap_kernel()->pm_segs=(int64_t *)(u_long)newp; 1054 pmap_kernel()->pm_physaddr = newp; 1055 /* mark kernel context as busy */ 1056 ((paddr_t*)ctxbusy)[0] = pmap_kernel()->pm_physaddr; 1057 } 1058 /* 1059 * finish filling out kernel pmap. 1060 */ 1061 1062 BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\r\n", 1063 (long)pmap_kernel()->pm_physaddr)); 1064 /* 1065 * Tell pmap about our mesgbuf -- Hope this works already 1066 */ 1067 #ifdef DEBUG 1068 BDPRINTF(PDB_BOOT1, ("Calling consinit()\r\n")); 1069 if (pmapdebug & PDB_BOOT1) consinit(); 1070 BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\r\n")); 1071 #endif 1072 /* it's not safe to call pmap_enter so we need to do this ourselves */ 1073 va = (vaddr_t)msgbufp; 1074 prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1); 1075 while (msgbufsiz) { 1076 data = TSB_DATA(0 /* global */, 1077 PGSZ_8K, 1078 phys_msgbuf, 1079 1 /* priv */, 1080 1 /* Write */, 1081 1 /* Cacheable */, 1082 FORCE_ALIAS /* ALIAS -- Disable D$ */, 1083 1 /* valid */, 1084 0 /* IE */); 1085 pmap_enter_kpage(va, data); 1086 va += PAGE_SIZE; 1087 msgbufsiz -= PAGE_SIZE; 1088 phys_msgbuf += PAGE_SIZE; 1089 } 1090 BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\r\n")); 1091 1092 BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\r\n")); 1093 data = 0; 1094 if (CPU_ISSUN4U || CPU_ISSUN4US) 1095 data = SUN4U_TLB_EXEC; 1096 for (i = 0; i < prom_map_size; i++) { 1097 if (prom_map[i].vstart && ((prom_map[i].vstart>>32) == 0)) { 1098 for (j = 0; j < prom_map[i].vsize; j += NBPG) { 1099 int k; 1100 1101 for (k = 0; page_size_map[k].mask; k++) { 1102 if (((prom_map[i].vstart | 1103 prom_map[i].tte) & 1104 page_size_map[k].mask) == 0 && 1105 page_size_map[k].mask < 1106 prom_map[i].vsize) 1107 break; 1108 } 1109 /* Enter PROM map into pmap_kernel() */ 1110 pmap_enter_kpage(prom_map[i].vstart + j, 1111 (prom_map[i].tte + j)|data| 1112 page_size_map[k].code); 1113 } 1114 } 1115 } 1116 BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\r\n")); 1117 1118 /* 1119 * Fix up start of kernel heap. 1120 */ 1121 vmmap = (vaddr_t)roundup(ekdata, 4*MEG); 1122 /* Let's keep 1 page of redzone after the kernel */ 1123 vmmap += NBPG; 1124 { 1125 extern vaddr_t u0[2]; 1126 extern struct pcb* proc0paddr; 1127 extern void main(void); 1128 paddr_t pa; 1129 1130 /* Initialize all the pointers to u0 */ 1131 u0[0] = vmmap; 1132 /* Allocate some VAs for u0 */ 1133 u0[1] = vmmap + 2*USPACE; 1134 1135 BDPRINTF(PDB_BOOT1, 1136 ("Inserting stack 0 into pmap_kernel() at %p\r\n", 1137 vmmap)); 1138 1139 while (vmmap < u0[1]) { 1140 int64_t data; 1141 1142 pmap_get_page(&pa, NULL, pmap_kernel()); 1143 prom_map_phys(pa, NBPG, vmmap, -1); 1144 data = TSB_DATA(0 /* global */, 1145 PGSZ_8K, 1146 pa, 1147 1 /* priv */, 1148 1 /* Write */, 1149 1 /* Cacheable */, 1150 FORCE_ALIAS /* ALIAS -- Disable D$ */, 1151 1 /* valid */, 1152 0 /* IE */); 1153 pmap_enter_kpage(vmmap, data); 1154 vmmap += NBPG; 1155 } 1156 BDPRINTF(PDB_BOOT1, 1157 ("Done inserting stack 0 into pmap_kernel()\r\n")); 1158 1159 /* Now map in and initialize our cpu_info structure */ 1160 #ifdef DIAGNOSTIC 1161 vmmap += NBPG; /* redzone -- XXXX do we need one? */ 1162 #endif 1163 intstk = vmmap = roundup(vmmap, 64*KB); 1164 cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK); 1165 1166 BDPRINTF(PDB_BOOT1, 1167 ("Inserting cpu_info into pmap_kernel() at %p\r\n", 1168 cpus)); 1169 /* Now map in all 8 pages of cpu_info */ 1170 pa = cpu0paddr; 1171 prom_map_phys(pa, 64*KB, vmmap, -1); 1172 /* 1173 * Also map it in as the interrupt stack. 1174 * This lets the PROM see this if needed. 1175 * 1176 * XXXX locore.s does not flush these mappings 1177 * before installing the locked TTE. 1178 */ 1179 prom_map_phys(pa, 64*KB, CPUINFO_VA, -1); 1180 for (i=0; i<8; i++) { 1181 int64_t data; 1182 1183 data = TSB_DATA(0 /* global */, 1184 PGSZ_8K, 1185 pa, 1186 1 /* priv */, 1187 1 /* Write */, 1188 1 /* Cacheable */, 1189 FORCE_ALIAS /* ALIAS -- Disable D$ */, 1190 1 /* valid */, 1191 0 /* IE */); 1192 pmap_enter_kpage(vmmap, data); 1193 vmmap += NBPG; 1194 pa += NBPG; 1195 } 1196 BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\r\n")); 1197 1198 /* Initialize our cpu_info structure */ 1199 bzero((void *)intstk, 8*NBPG); 1200 cpus->ci_self = cpus; 1201 cpus->ci_next = NULL; /* Redundant, I know. */ 1202 cpus->ci_curproc = &proc0; 1203 cpus->ci_cpcb = (struct pcb *)u0[0]; /* Need better source */ 1204 cpus->ci_upaid = cpu_myid(); 1205 cpus->ci_number = 0; 1206 cpus->ci_flags = CPUF_RUNNING; 1207 cpus->ci_fpproc = NULL; 1208 cpus->ci_spinup = main; /* Call main when we're running. */ 1209 cpus->ci_initstack = (void *)u0[1]; 1210 cpus->ci_paddr = cpu0paddr; 1211 #ifdef SUN4V 1212 cpus->ci_mmfsa = cpu0paddr; 1213 #endif 1214 proc0paddr = cpus->ci_cpcb; 1215 1216 cpu0paddr += 64 * KB; 1217 1218 /* The rest will be done at CPU attach time. */ 1219 BDPRINTF(PDB_BOOT1, 1220 ("Done inserting cpu_info into pmap_kernel()\r\n")); 1221 } 1222 1223 vmmap = (vaddr_t)reserve_dumppages((caddr_t)(u_long)vmmap); 1224 BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\r\n")); 1225 1226 pmap_bootstrap_cpu(cpus->ci_paddr); 1227 } 1228 1229 void sun4u_bootstrap_cpu(paddr_t); 1230 void sun4v_bootstrap_cpu(paddr_t); 1231 1232 void 1233 pmap_bootstrap_cpu(paddr_t intstack) 1234 { 1235 if (CPU_ISSUN4V) 1236 sun4v_bootstrap_cpu(intstack); 1237 else 1238 sun4u_bootstrap_cpu(intstack); 1239 } 1240 1241 extern void sun4u_set_tsbs(void); 1242 1243 void 1244 sun4u_bootstrap_cpu(paddr_t intstack) 1245 { 1246 u_int64_t data; 1247 paddr_t pa; 1248 vaddr_t va; 1249 int index; 1250 int impl; 1251 1252 impl = (getver() & VER_IMPL) >> VER_IMPL_SHIFT; 1253 1254 /* 1255 * Establish the 4MB locked mappings for kernel data and text. 1256 * 1257 * The text segment needs to be mapped into the DTLB too, 1258 * because of .rodata. 1259 */ 1260 1261 index = 15; /* XXX */ 1262 for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) { 1263 data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, FORCE_ALIAS, 1, 0); 1264 data |= SUN4U_TLB_L; 1265 prom_itlb_load(index, data, va); 1266 prom_dtlb_load(index, data, va); 1267 index--; 1268 } 1269 1270 for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) { 1271 data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, FORCE_ALIAS, 1, 0); 1272 data |= SUN4U_TLB_L; 1273 prom_dtlb_load(index, data, va); 1274 index--; 1275 } 1276 1277 #ifdef MULTIPROCESSOR 1278 if (impl >= IMPL_OLYMPUS_C && impl <= IMPL_JUPITER) { 1279 /* 1280 * On SPARC64-VI and SPARC64-VII processors, the MMU is 1281 * shared between threads, so we can't establish a locked 1282 * mapping for the interrupt stack since the mappings would 1283 * conflict. Instead we stick the address in a scratch 1284 * register, like we do for sun4v. 1285 */ 1286 pa = intstack + (CPUINFO_VA - INTSTACK); 1287 pa += offsetof(struct cpu_info, ci_self); 1288 va = ldxa(pa, ASI_PHYS_CACHED); 1289 stxa(0x00, ASI_SCRATCH, va); 1290 1291 if ((CPU_JUPITERID % 2) == 1) 1292 index--; 1293 1294 data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, FORCE_ALIAS, 1, 0); 1295 data |= SUN4U_TLB_L; 1296 prom_dtlb_load(index, data, va - (CPUINFO_VA - INTSTACK)); 1297 1298 sun4u_set_tsbs(); 1299 return; 1300 } 1301 #endif 1302 1303 /* 1304 * Establish the 64KB locked mapping for the interrupt stack. 1305 */ 1306 1307 data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, FORCE_ALIAS, 1, 0); 1308 data |= SUN4U_TLB_L; 1309 prom_dtlb_load(index, data, INTSTACK); 1310 1311 sun4u_set_tsbs(); 1312 } 1313 1314 void 1315 sun4v_bootstrap_cpu(paddr_t intstack) 1316 { 1317 #ifdef SUN4V 1318 u_int64_t data; 1319 paddr_t pa; 1320 vaddr_t va; 1321 int err; 1322 1323 /* 1324 * Establish the 4MB locked mappings for kernel data and text. 1325 * 1326 * The text segment needs to be mapped into the DTLB too, 1327 * because of .rodata. 1328 */ 1329 1330 for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) { 1331 data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, 0, 1, 0); 1332 data |= SUN4V_TLB_X; 1333 err = hv_mmu_map_perm_addr(va, data, MAP_ITLB|MAP_DTLB); 1334 if (err != H_EOK) 1335 prom_printf("err: %d\r\n", err); 1336 } 1337 1338 for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) { 1339 data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, 0, 1, 0); 1340 err = hv_mmu_map_perm_addr(va, data, MAP_DTLB); 1341 if (err != H_EOK) 1342 prom_printf("err: %d\r\n", err); 1343 } 1344 1345 #ifndef MULTIPROCESSOR 1346 /* 1347 * Establish the 64KB locked mapping for the interrupt stack. 1348 */ 1349 data = SUN4V_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0); 1350 err = hv_mmu_map_perm_addr(INTSTACK, data, MAP_DTLB); 1351 if (err != H_EOK) 1352 prom_printf("err: %d\r\n", err); 1353 #else 1354 pa = intstack + (CPUINFO_VA - INTSTACK); 1355 pa += offsetof(struct cpu_info, ci_self); 1356 stxa(0x00, ASI_SCRATCHPAD, ldxa(pa, ASI_PHYS_CACHED)); 1357 #endif 1358 1359 stxa(0x10, ASI_SCRATCHPAD, intstack + (CPUINFO_VA - INTSTACK)); 1360 1361 err = hv_mmu_tsb_ctx0(1, (paddr_t)tsb_desc + kdatap - kdata); 1362 if (err != H_EOK) 1363 prom_printf("err: %d\r\n", err); 1364 err = hv_mmu_tsb_ctxnon0(1, (paddr_t)tsb_desc + kdatap - kdata); 1365 if (err != H_EOK) 1366 prom_printf("err: %d\r\n", err); 1367 #endif 1368 } 1369 1370 /* 1371 * Initialize anything else for pmap handling. 1372 * Called during uvm_init(). 1373 */ 1374 void 1375 pmap_init(void) 1376 { 1377 BDPRINTF(PDB_BOOT1, ("pmap_init()\r\n")); 1378 if (PAGE_SIZE != NBPG) 1379 panic("pmap_init: CLSIZE!=1"); 1380 1381 /* Setup a pool for additional pvlist structures */ 1382 pool_init(&pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pv_entry", NULL); 1383 pool_setipl(&pv_pool, IPL_VM); 1384 pool_init(&pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", 1385 &pool_allocator_nointr); 1386 } 1387 1388 /* Start of non-cachable physical memory on UltraSPARC-III. */ 1389 #define VM_MAXPHYS_ADDRESS ((vaddr_t)0x0000040000000000L) 1390 1391 static vaddr_t kbreak; /* End of kernel VA */ 1392 1393 /* 1394 * How much virtual space is available to the kernel? 1395 */ 1396 void 1397 pmap_virtual_space(vaddr_t *start, vaddr_t *end) 1398 { 1399 /* 1400 * Make sure virtual memory and physical memory don't overlap 1401 * to avoid problems with ASI_PHYS_CACHED on UltraSPARC-III. 1402 */ 1403 if (vmmap < VM_MAXPHYS_ADDRESS) 1404 vmmap = VM_MAXPHYS_ADDRESS; 1405 1406 /* Reserve two pages for pmap_copy_page && /dev/mem */ 1407 *start = kbreak = (vaddr_t)(vmmap + 2*NBPG); 1408 *end = VM_MAX_KERNEL_ADDRESS; 1409 BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\r\n", *start, *end)); 1410 } 1411 1412 /* 1413 * Preallocate kernel page tables to a specified VA. 1414 * This simply loops through the first TTE for each 1415 * page table from the beginning of the kernel pmap, 1416 * reads the entry, and if the result is 1417 * zero (either invalid entry or no page table) it stores 1418 * a zero there, populating page tables in the process. 1419 * This is not the most efficient technique but i don't 1420 * expect it to be called that often. 1421 */ 1422 vaddr_t 1423 pmap_growkernel(vaddr_t maxkvaddr) 1424 { 1425 paddr_t pg; 1426 struct pmap *pm = pmap_kernel(); 1427 1428 if (maxkvaddr >= VM_MAX_KERNEL_ADDRESS) { 1429 printf("WARNING: cannot extend kernel pmap beyond %p to %p\n", 1430 (void *)VM_MAX_KERNEL_ADDRESS, (void *)maxkvaddr); 1431 return (kbreak); 1432 } 1433 1434 /* Align with the start of a page table */ 1435 for (kbreak &= (-1<<PDSHIFT); kbreak < maxkvaddr; 1436 kbreak += (1<<PDSHIFT)) { 1437 if (pseg_get(pm, kbreak)) 1438 continue; 1439 1440 pg = 0; 1441 while (pseg_set(pm, kbreak, 0, pg) == 1) { 1442 pg = 0; 1443 pmap_get_page(&pg, "growk", pm); 1444 } 1445 1446 } 1447 1448 return (kbreak); 1449 } 1450 1451 /* 1452 * Create and return a physical map. 1453 */ 1454 struct pmap * 1455 pmap_create(void) 1456 { 1457 struct pmap *pm; 1458 1459 pm = pool_get(&pmap_pool, PR_WAITOK | PR_ZERO); 1460 1461 mtx_init(&pm->pm_mtx, IPL_VM); 1462 pm->pm_refs = 1; 1463 pmap_get_page(&pm->pm_physaddr, "pmap_create", pm); 1464 pm->pm_segs = (int64_t *)(u_long)pm->pm_physaddr; 1465 ctx_alloc(pm); 1466 1467 return (pm); 1468 } 1469 1470 /* 1471 * Add a reference to the given pmap. 1472 */ 1473 void 1474 pmap_reference(struct pmap *pm) 1475 { 1476 atomic_inc_int(&pm->pm_refs); 1477 } 1478 1479 /* 1480 * Retire the given pmap from service. 1481 * Should only be called if the map contains no valid mappings. 1482 */ 1483 void 1484 pmap_destroy(struct pmap *pm) 1485 { 1486 if (atomic_dec_int_nv(&pm->pm_refs) == 0) { 1487 pmap_release(pm); 1488 pool_put(&pmap_pool, pm); 1489 } 1490 } 1491 1492 /* 1493 * Release any resources held by the given physical map. 1494 * Called when a pmap initialized by pmap_pinit is being released. 1495 */ 1496 void 1497 pmap_release(struct pmap *pm) 1498 { 1499 int i, j, k; 1500 paddr_t *pdir, *ptbl, tmp; 1501 1502 #ifdef DIAGNOSTIC 1503 if(pm == pmap_kernel()) 1504 panic("pmap_release: releasing pmap_kernel()"); 1505 #endif 1506 1507 mtx_enter(&pm->pm_mtx); 1508 for(i=0; i<STSZ; i++) { 1509 paddr_t psegentp = (paddr_t)(u_long)&pm->pm_segs[i]; 1510 if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)psegentp, 1511 ASI_PHYS_CACHED))) { 1512 for (k=0; k<PDSZ; k++) { 1513 paddr_t pdirentp = (paddr_t)(u_long)&pdir[k]; 1514 if ((ptbl = (paddr_t *)(u_long)ldxa( 1515 (vaddr_t)pdirentp, ASI_PHYS_CACHED))) { 1516 for (j=0; j<PTSZ; j++) { 1517 int64_t data; 1518 paddr_t pa; 1519 pv_entry_t pv; 1520 1521 data = ldxa((vaddr_t)&ptbl[j], 1522 ASI_PHYS_CACHED); 1523 if (!(data & TLB_V)) 1524 continue; 1525 pa = data & TLB_PA_MASK; 1526 pv = pa_to_pvh(pa); 1527 if (pv != NULL) { 1528 printf("pmap_release: pm=%p page %llx still in use\n", pm, 1529 (unsigned long long)(((u_int64_t)i<<STSHIFT)|((u_int64_t)k<<PDSHIFT)|((u_int64_t)j<<PTSHIFT))); 1530 Debugger(); 1531 } 1532 } 1533 stxa(pdirentp, ASI_PHYS_CACHED, 0); 1534 pmap_free_page((paddr_t)ptbl, pm); 1535 } 1536 } 1537 stxa(psegentp, ASI_PHYS_CACHED, 0); 1538 pmap_free_page((paddr_t)pdir, pm); 1539 } 1540 } 1541 tmp = (paddr_t)(u_long)pm->pm_segs; 1542 pm->pm_segs = NULL; 1543 pmap_free_page(tmp, pm); 1544 mtx_leave(&pm->pm_mtx); 1545 ctx_free(pm); 1546 } 1547 1548 /* 1549 * Copy the range specified by src_addr/len 1550 * from the source map to the range dst_addr/len 1551 * in the destination map. 1552 * 1553 * This routine is only advisory and need not do anything. 1554 */ 1555 void 1556 pmap_copy(struct pmap *dst_pmap, struct pmap *src_pmap, vaddr_t dst_addr, 1557 vsize_t len, vaddr_t src_addr) 1558 { 1559 } 1560 1561 /* 1562 * Garbage collects the physical map system for 1563 * pages which are no longer used. 1564 * Success need not be guaranteed -- that is, there 1565 * may well be pages which are not referenced, but 1566 * others may be collected. 1567 * Called by the pageout daemon when pages are scarce. 1568 */ 1569 void 1570 pmap_collect(struct pmap *pm) 1571 { 1572 #if 1 1573 int i, j, k, n, m, s; 1574 paddr_t *pdir, *ptbl; 1575 /* This is a good place to scan the pmaps for page tables with 1576 * no valid mappings in them and free them. */ 1577 1578 /* NEVER GARBAGE COLLECT THE KERNEL PMAP */ 1579 if (pm == pmap_kernel()) 1580 return; 1581 1582 s = splvm(); 1583 for (i=0; i<STSZ; i++) { 1584 if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) { 1585 m = 0; 1586 for (k=0; k<PDSZ; k++) { 1587 if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) { 1588 m++; 1589 n = 0; 1590 for (j=0; j<PTSZ; j++) { 1591 int64_t data = ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED); 1592 if (data&TLB_V) 1593 n++; 1594 } 1595 if (!n) { 1596 /* Free the damn thing */ 1597 stxa((paddr_t)(u_long)&pdir[k], ASI_PHYS_CACHED, 0); 1598 pmap_free_page((paddr_t)ptbl, pm); 1599 } 1600 } 1601 } 1602 if (!m) { 1603 /* Free the damn thing */ 1604 stxa((paddr_t)(u_long)&pm->pm_segs[i], ASI_PHYS_CACHED, 0); 1605 pmap_free_page((paddr_t)pdir, pm); 1606 } 1607 } 1608 } 1609 splx(s); 1610 #endif 1611 } 1612 1613 void 1614 pmap_zero_page(struct vm_page *pg) 1615 { 1616 pmap_zero_phys(VM_PAGE_TO_PHYS(pg)); 1617 } 1618 1619 void 1620 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 1621 { 1622 paddr_t src = VM_PAGE_TO_PHYS(srcpg); 1623 paddr_t dst = VM_PAGE_TO_PHYS(dstpg); 1624 1625 pmap_copy_phys(src, dst); 1626 } 1627 1628 /* 1629 * Activate the address space for the specified process. If the 1630 * process is the current process, load the new MMU context. 1631 */ 1632 void 1633 pmap_activate(struct proc *p) 1634 { 1635 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 1636 int s; 1637 1638 /* 1639 * This is essentially the same thing that happens in cpu_switch() 1640 * when the newly selected process is about to run, except that we 1641 * have to make sure to clean the register windows before we set 1642 * the new context. 1643 */ 1644 1645 s = splvm(); 1646 if (p == curproc) { 1647 write_user_windows(); 1648 if (pmap->pm_ctx == 0) 1649 ctx_alloc(pmap); 1650 if (CPU_ISSUN4V) 1651 stxa(CTX_SECONDARY, ASI_MMU_CONTEXTID, pmap->pm_ctx); 1652 else 1653 stxa(CTX_SECONDARY, ASI_DMMU, pmap->pm_ctx); 1654 } 1655 splx(s); 1656 } 1657 1658 /* 1659 * Deactivate the address space of the specified process. 1660 */ 1661 void 1662 pmap_deactivate(struct proc *p) 1663 { 1664 } 1665 1666 /* 1667 * pmap_kenter_pa: [ INTERFACE ] 1668 * 1669 * Enter a va -> pa mapping into the kernel pmap without any 1670 * physical->virtual tracking. 1671 * 1672 * Note: no locking is necessary in this function. 1673 */ 1674 void 1675 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 1676 { 1677 pte_t tte; 1678 struct pmap *pm = pmap_kernel(); 1679 int s; 1680 1681 KDASSERT(va < INTSTACK || va > EINTSTACK); 1682 KDASSERT(va < kdata || va > ekdata); 1683 1684 #ifdef DIAGNOSTIC 1685 if (pa & (PMAP_NVC|PMAP_NC|PMAP_LITTLE)) 1686 panic("pmap_kenter_pa: illegal cache flags %ld", pa); 1687 #endif 1688 1689 /* 1690 * Construct the TTE. 1691 */ 1692 s = splvm(); 1693 tte.tag = TSB_TAG(0,pm->pm_ctx,va); 1694 if (CPU_ISSUN4V) { 1695 tte.data = SUN4V_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */, 1696 (PROT_WRITE & prot), 1, 0, 1, 0); 1697 /* 1698 * We don't track modification on kenter mappings. 1699 */ 1700 if (prot & PROT_WRITE) 1701 tte.data |= SUN4V_TLB_REAL_W|SUN4V_TLB_W; 1702 if (prot & PROT_EXEC) 1703 tte.data |= SUN4V_TLB_EXEC; 1704 tte.data |= SUN4V_TLB_TSB_LOCK; /* wired */ 1705 } else { 1706 tte.data = SUN4U_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */, 1707 (PROT_WRITE & prot), 1, 0, 1, 0); 1708 /* 1709 * We don't track modification on kenter mappings. 1710 */ 1711 if (prot & PROT_WRITE) 1712 tte.data |= SUN4U_TLB_REAL_W|SUN4U_TLB_W; 1713 if (prot & PROT_EXEC) 1714 tte.data |= SUN4U_TLB_EXEC; 1715 tte.data |= SUN4U_TLB_TSB_LOCK; /* wired */ 1716 } 1717 KDASSERT((tte.data & TLB_NFO) == 0); 1718 1719 /* Kernel page tables are pre-allocated. */ 1720 if (pseg_set(pmap_kernel(), va, tte.data, 0) != 0) 1721 panic("pmap_kenter_pa: no pseg"); 1722 1723 pmap_kernel()->pm_stats.resident_count++; 1724 1725 splx(s); 1726 /* this is correct */ 1727 dcache_flush_page(pa); 1728 } 1729 1730 /* 1731 * pmap_kremove: [ INTERFACE ] 1732 * 1733 * Remove a mapping entered with pmap_kenter_pa() starting at va, 1734 * for size bytes (assumed to be page rounded). 1735 */ 1736 void 1737 pmap_kremove(vaddr_t va, vsize_t size) 1738 { 1739 struct pmap *pm = pmap_kernel(); 1740 int64_t data; 1741 int s; 1742 1743 KDASSERT(va < INTSTACK || va > EINTSTACK); 1744 KDASSERT(va < kdata || va > ekdata); 1745 1746 s = splvm(); 1747 while (size >= NBPG) { 1748 /* 1749 * Is this part of the permanent 4MB mapping? 1750 */ 1751 #ifdef DIAGNOSTIC 1752 if (pm == pmap_kernel() && 1753 (va >= ktext && va < roundup(ekdata, 4*MEG))) 1754 panic("pmap_kremove: va=%08x in locked TLB", 1755 (u_int)va); 1756 #endif 1757 /* Shouldn't need to do this if the entry's not valid. */ 1758 if ((data = pseg_get(pm, va))) { 1759 /* We need to flip the valid bit and clear the access statistics. */ 1760 if (pseg_set(pm, va, 0, 0)) { 1761 printf("pmap_kremove: gotten pseg empty!\n"); 1762 Debugger(); 1763 /* panic? */ 1764 } 1765 1766 pmap_kernel()->pm_stats.resident_count--; 1767 tsb_invalidate(pm->pm_ctx, va); 1768 /* Here we assume nothing can get into the TLB unless it has a PTE */ 1769 tlb_flush_pte(va, pm->pm_ctx); 1770 } 1771 va += NBPG; 1772 size -= NBPG; 1773 } 1774 splx(s); 1775 } 1776 1777 /* 1778 * Insert physical page at pa into the given pmap at virtual address va. 1779 * Supports 64-bit pa so we can map I/O space. 1780 */ 1781 int 1782 pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 1783 { 1784 pte_t tte; 1785 paddr_t pg; 1786 int aliased = 0; 1787 pv_entry_t pv = NULL; 1788 int size = 0; /* PMAP_SZ_TO_TTE(pa); */ 1789 boolean_t wired = (flags & PMAP_WIRED) != 0; 1790 1791 /* 1792 * Is this part of the permanent mappings? 1793 */ 1794 KDASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK); 1795 KDASSERT(pm != pmap_kernel() || va < kdata || va > ekdata); 1796 1797 /* 1798 * XXXX If a mapping at this address already exists, remove it. 1799 */ 1800 mtx_enter(&pm->pm_mtx); 1801 tte.data = pseg_get(pm, va); 1802 if (tte.data & TLB_V) { 1803 mtx_leave(&pm->pm_mtx); 1804 pmap_remove(pm, va, va + NBPG-1); 1805 mtx_enter(&pm->pm_mtx); 1806 tte.data = pseg_get(pm, va); 1807 } 1808 1809 /* 1810 * Construct the TTE. 1811 */ 1812 pv = pa_to_pvh(pa); 1813 if (pv != NULL) { 1814 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 1815 1816 mtx_enter(&pg->mdpage.pvmtx); 1817 aliased = (pv->pv_va & (PV_ALIAS|PV_NVC)); 1818 #ifdef DIAGNOSTIC 1819 if ((flags & PROT_MASK) & ~prot) 1820 panic("pmap_enter: access_type exceeds prot"); 1821 #endif 1822 /* If we don't have the traphandler do it, set the ref/mod bits now */ 1823 if (flags & PROT_MASK) 1824 pv->pv_va |= PV_REF; 1825 if (flags & PROT_WRITE) 1826 pv->pv_va |= PV_MOD; 1827 pv->pv_va |= pmap_tte2flags(tte.data); 1828 mtx_leave(&pg->mdpage.pvmtx); 1829 } else { 1830 aliased = 0; 1831 } 1832 if (pa & PMAP_NVC) 1833 aliased = 1; 1834 #ifdef NO_VCACHE 1835 aliased = 1; /* Disable D$ */ 1836 #endif 1837 if (CPU_ISSUN4V) { 1838 tte.data = SUN4V_TSB_DATA(0, size, pa, pm == pmap_kernel(), 1839 (flags & PROT_WRITE), (!(pa & PMAP_NC)), 1840 aliased, 1, (pa & PMAP_LITTLE)); 1841 if (prot & PROT_WRITE) 1842 tte.data |= SUN4V_TLB_REAL_W; 1843 if (prot & PROT_EXEC) 1844 tte.data |= SUN4V_TLB_EXEC; 1845 if (wired) 1846 tte.data |= SUN4V_TLB_TSB_LOCK; 1847 } else { 1848 tte.data = SUN4U_TSB_DATA(0, size, pa, pm == pmap_kernel(), 1849 (flags & PROT_WRITE), (!(pa & PMAP_NC)), 1850 aliased, 1, (pa & PMAP_LITTLE)); 1851 if (prot & PROT_WRITE) 1852 tte.data |= SUN4U_TLB_REAL_W; 1853 if (prot & PROT_EXEC) 1854 tte.data |= SUN4U_TLB_EXEC; 1855 if (wired) 1856 tte.data |= SUN4U_TLB_TSB_LOCK; 1857 } 1858 KDASSERT((tte.data & TLB_NFO) == 0); 1859 1860 pg = 0; 1861 while (pseg_set(pm, va, tte.data, pg) == 1) { 1862 pg = 0; 1863 if (!pmap_get_page(&pg, NULL, pm)) { 1864 if ((flags & PMAP_CANFAIL) == 0) 1865 panic("pmap_enter: no memory"); 1866 mtx_leave(&pm->pm_mtx); 1867 return (ENOMEM); 1868 } 1869 } 1870 1871 if (pv) 1872 pmap_enter_pv(pm, va, pa); 1873 pm->pm_stats.resident_count++; 1874 mtx_leave(&pm->pm_mtx); 1875 if (pm->pm_ctx || pm == pmap_kernel()) { 1876 tsb_invalidate(pm->pm_ctx, va); 1877 1878 /* Force reload -- protections may be changed */ 1879 tlb_flush_pte(va, pm->pm_ctx); 1880 } 1881 /* this is correct */ 1882 dcache_flush_page(pa); 1883 1884 /* We will let the fast mmu miss interrupt load the new translation */ 1885 return 0; 1886 } 1887 1888 /* 1889 * Remove the given range of mapping entries. 1890 */ 1891 void 1892 pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva) 1893 { 1894 int flush = 0; 1895 int64_t data; 1896 vaddr_t flushva = va; 1897 1898 /* 1899 * In here we should check each pseg and if there are no more entries, 1900 * free it. It's just that linear scans of 8K pages gets expensive. 1901 */ 1902 1903 KDASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK); 1904 KDASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata); 1905 1906 mtx_enter(&pm->pm_mtx); 1907 1908 /* Now do the real work */ 1909 while (va < endva) { 1910 /* 1911 * Is this part of the permanent 4MB mapping? 1912 */ 1913 #ifdef DIAGNOSTIC 1914 if (pm == pmap_kernel() && va >= ktext && 1915 va < roundup(ekdata, 4*MEG)) 1916 panic("pmap_remove: va=%08x in locked TLB", (u_int)va); 1917 #endif 1918 /* We don't really need to do this if the valid bit is not set... */ 1919 if ((data = pseg_get(pm, va)) && (data & TLB_V) != 0) { 1920 paddr_t entry; 1921 pv_entry_t pv; 1922 1923 flush |= 1; 1924 /* First remove it from the pv_table */ 1925 entry = (data & TLB_PA_MASK); 1926 pv = pa_to_pvh(entry); 1927 if (pv != NULL) 1928 pmap_remove_pv(pm, va, entry); 1929 /* We need to flip the valid bit and clear the access statistics. */ 1930 if (pseg_set(pm, va, 0, 0)) { 1931 printf("pmap_remove: gotten pseg empty!\n"); 1932 Debugger(); 1933 /* panic? */ 1934 } 1935 pm->pm_stats.resident_count--; 1936 if (!pm->pm_ctx && pm != pmap_kernel()) 1937 continue; 1938 tsb_invalidate(pm->pm_ctx, va); 1939 /* Here we assume nothing can get into the TLB unless it has a PTE */ 1940 tlb_flush_pte(va, pm->pm_ctx); 1941 } 1942 va += NBPG; 1943 } 1944 mtx_leave(&pm->pm_mtx); 1945 if (flush) { 1946 cache_flush_virt(flushva, endva - flushva); 1947 } 1948 } 1949 1950 /* 1951 * Change the protection on the specified range of this pmap. 1952 */ 1953 void 1954 pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1955 { 1956 paddr_t pa; 1957 pv_entry_t pv; 1958 int64_t data; 1959 1960 KDASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK); 1961 KDASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata); 1962 1963 if ((prot & (PROT_WRITE | PROT_EXEC)) == 1964 (PROT_WRITE | PROT_EXEC)) 1965 return; 1966 1967 if (prot == PROT_NONE) { 1968 pmap_remove(pm, sva, eva); 1969 return; 1970 } 1971 1972 mtx_enter(&pm->pm_mtx); 1973 sva = sva & ~PGOFSET; 1974 while (sva < eva) { 1975 /* 1976 * Is this part of the permanent 4MB mapping? 1977 */ 1978 if (pm == pmap_kernel() && sva >= ktext && 1979 sva < roundup(ekdata, 4*MEG)) { 1980 prom_printf("pmap_protect: va=%08x in locked TLB\r\n", sva); 1981 OF_enter(); 1982 mtx_leave(&pm->pm_mtx); 1983 return; 1984 } 1985 1986 if (((data = pseg_get(pm, sva))&TLB_V) /*&& ((data&TLB_TSB_LOCK) == 0)*/) { 1987 pa = data & TLB_PA_MASK; 1988 pv = pa_to_pvh(pa); 1989 if (pv != NULL) { 1990 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 1991 1992 /* Save REF/MOD info */ 1993 mtx_enter(&pg->mdpage.pvmtx); 1994 pv->pv_va |= pmap_tte2flags(data); 1995 mtx_leave(&pg->mdpage.pvmtx); 1996 } 1997 /* Just do the pmap and TSB, not the pv_list */ 1998 if (CPU_ISSUN4V) { 1999 if ((prot & PROT_WRITE) == 0) 2000 data &= ~(SUN4V_TLB_W|SUN4V_TLB_REAL_W); 2001 if ((prot & PROT_EXEC) == 0) 2002 data &= ~(SUN4V_TLB_EXEC); 2003 } else { 2004 if ((prot & PROT_WRITE) == 0) 2005 data &= ~(SUN4U_TLB_W|SUN4U_TLB_REAL_W); 2006 if ((prot & PROT_EXEC) == 0) 2007 data &= ~(SUN4U_TLB_EXEC); 2008 } 2009 KDASSERT((data & TLB_NFO) == 0); 2010 if (pseg_set(pm, sva, data, 0)) { 2011 printf("pmap_protect: gotten pseg empty!\n"); 2012 Debugger(); 2013 /* panic? */ 2014 } 2015 2016 if (!pm->pm_ctx && pm != pmap_kernel()) 2017 continue; 2018 tsb_invalidate(pm->pm_ctx, sva); 2019 tlb_flush_pte(sva, pm->pm_ctx); 2020 } 2021 sva += NBPG; 2022 } 2023 mtx_leave(&pm->pm_mtx); 2024 } 2025 2026 /* 2027 * Extract the physical page address associated 2028 * with the given map/virtual_address pair. 2029 */ 2030 boolean_t 2031 pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap) 2032 { 2033 paddr_t pa; 2034 2035 if (pm == pmap_kernel() && va >= kdata && 2036 va < roundup(ekdata, 4*MEG)) { 2037 /* Need to deal w/locked TLB entry specially. */ 2038 pa = (paddr_t) (kdatap - kdata + va); 2039 } else if( pm == pmap_kernel() && va >= ktext && va < ektext ) { 2040 /* Need to deal w/locked TLB entry specially. */ 2041 pa = (paddr_t) (ktextp - ktext + va); 2042 } else if (pm == pmap_kernel() && va >= INTSTACK && va < EINTSTACK) { 2043 pa = curcpu()->ci_paddr + va - INTSTACK; 2044 } else { 2045 int s; 2046 2047 s = splvm(); 2048 pa = (pseg_get(pm, va) & TLB_PA_MASK) + (va & PGOFSET); 2049 splx(s); 2050 } 2051 if (pa == 0) 2052 return (FALSE); 2053 if (pap != NULL) 2054 *pap = pa; 2055 return (TRUE); 2056 } 2057 2058 /* 2059 * Return the number bytes that pmap_dumpmmu() will dump. 2060 */ 2061 int 2062 pmap_dumpsize(void) 2063 { 2064 int sz; 2065 2066 sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)); 2067 sz += memsize * sizeof(phys_ram_seg_t); 2068 2069 return btodb(sz + DEV_BSIZE - 1); 2070 } 2071 2072 /* 2073 * Write the mmu contents to the dump device. 2074 * This gets appended to the end of a crash dump since 2075 * there is no in-core copy of kernel memory mappings on a 4/4c machine. 2076 * 2077 * Write the core dump headers and MD data to the dump device. 2078 * We dump the following items: 2079 * 2080 * kcore_seg_t MI header defined in <sys/kcore.h>) 2081 * cpu_kcore_hdr_t MD header defined in <machine/kcore.h>) 2082 * phys_ram_seg_t[memsize] physical memory segments 2083 */ 2084 int 2085 pmap_dumpmmu(int (*dump)(dev_t, daddr_t, caddr_t, size_t), daddr_t blkno) 2086 { 2087 kcore_seg_t *kseg; 2088 cpu_kcore_hdr_t *kcpu; 2089 phys_ram_seg_t memseg; 2090 register int error = 0; 2091 register int i, memsegoffset; 2092 int buffer[dbtob(1) / sizeof(int)]; 2093 int *bp, *ep; 2094 2095 #define EXPEDITE(p,n) do { \ 2096 int *sp = (int *)(p); \ 2097 int sz = (n); \ 2098 while (sz > 0) { \ 2099 *bp++ = *sp++; \ 2100 if (bp >= ep) { \ 2101 error = (*dump)(dumpdev, blkno, \ 2102 (caddr_t)buffer, dbtob(1)); \ 2103 if (error != 0) \ 2104 return (error); \ 2105 ++blkno; \ 2106 bp = buffer; \ 2107 } \ 2108 sz -= 4; \ 2109 } \ 2110 } while (0) 2111 2112 /* Setup bookkeeping pointers */ 2113 bp = buffer; 2114 ep = &buffer[sizeof(buffer) / sizeof(buffer[0])]; 2115 2116 /* Fill in MI segment header */ 2117 kseg = (kcore_seg_t *)bp; 2118 CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU); 2119 kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t)); 2120 2121 /* Fill in MD segment header (interpreted by MD part of libkvm) */ 2122 kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t))); 2123 kcpu->cputype = CPU_SUN4U; 2124 kcpu->kernbase = (u_int64_t)KERNBASE; 2125 kcpu->cpubase = (u_int64_t)CPUINFO_VA; 2126 2127 /* Describe the locked text segment */ 2128 kcpu->ktextbase = (u_int64_t)ktext; 2129 kcpu->ktextp = (u_int64_t)ktextp; 2130 kcpu->ktextsz = (u_int64_t)(roundup(ektextp, 4*MEG) - ktextp); 2131 2132 /* Describe locked data segment */ 2133 kcpu->kdatabase = (u_int64_t)kdata; 2134 kcpu->kdatap = (u_int64_t)kdatap; 2135 kcpu->kdatasz = (u_int64_t)(roundup(ekdatap, 4*MEG) - kdatap); 2136 2137 /* Now the memsegs */ 2138 kcpu->nmemseg = memsize; 2139 kcpu->memsegoffset = memsegoffset = ALIGN(sizeof(cpu_kcore_hdr_t)); 2140 2141 /* Now we need to point this at our kernel pmap. */ 2142 kcpu->nsegmap = STSZ; 2143 kcpu->segmapoffset = (u_int64_t)pmap_kernel()->pm_physaddr; 2144 2145 /* Note: we have assumed everything fits in buffer[] so far... */ 2146 bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t))); 2147 2148 for (i = 0; i < memsize; i++) { 2149 memseg.start = mem[i].start; 2150 memseg.size = mem[i].size; 2151 EXPEDITE(&memseg, sizeof(phys_ram_seg_t)); 2152 } 2153 2154 if (bp != buffer) 2155 error = (*dump)(dumpdev, blkno++, (caddr_t)buffer, dbtob(1)); 2156 2157 return (error); 2158 } 2159 2160 /* 2161 * Determine (non)existence of physical page 2162 */ 2163 int pmap_pa_exists(paddr_t pa) 2164 { 2165 struct mem_region *mp; 2166 2167 /* Just go through physical memory list & see if we're there */ 2168 for (mp = mem; mp->size && mp->start <= pa; mp++) 2169 if (mp->start <= pa && mp->start + mp->size >= pa) 2170 return 1; 2171 return 0; 2172 } 2173 2174 /* 2175 * Lookup the appropriate TSB entry. 2176 * 2177 * Here is the full official pseudo code: 2178 * 2179 */ 2180 2181 #ifdef NOTYET 2182 int64 GenerateTSBPointer( 2183 int64 va, /* Missing VA */ 2184 PointerType type, /* 8K_POINTER or 16K_POINTER */ 2185 int64 TSBBase, /* TSB Register[63:13] << 13 */ 2186 Boolean split, /* TSB Register[12] */ 2187 int TSBSize) /* TSB Register[2:0] */ 2188 { 2189 int64 vaPortion; 2190 int64 TSBBaseMask; 2191 int64 splitMask; 2192 2193 /* TSBBaseMask marks the bits from TSB Base Reg */ 2194 TSBBaseMask = 0xffffffffffffe000 << 2195 (split? (TSBsize + 1) : TSBsize); 2196 2197 /* Shift va towards lsb appropriately and */ 2198 /* zero out the original va page offset */ 2199 vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) & 2200 0xfffffffffffffff0; 2201 2202 if (split) { 2203 /* There's only one bit in question for split */ 2204 splitMask = 1 << (13 + TSBsize); 2205 if (type == 8K_POINTER) 2206 /* Make sure we're in the lower half */ 2207 vaPortion &= ~splitMask; 2208 else 2209 /* Make sure we're in the upper half */ 2210 vaPortion |= splitMask; 2211 } 2212 return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask); 2213 } 2214 #endif 2215 /* 2216 * Of course, since we are not using a split TSB or variable page sizes, 2217 * we can optimize this a bit. 2218 * 2219 * The following only works for a unified 8K TSB. It will find the slot 2220 * for that particular va and return it. IT MAY BE FOR ANOTHER MAPPING! 2221 */ 2222 int 2223 ptelookup_va(vaddr_t va) 2224 { 2225 long tsbptr; 2226 #define TSBBASEMASK (0xffffffffffffe000LL<<tsbsize) 2227 2228 tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK ); 2229 return (tsbptr/sizeof(pte_t)); 2230 } 2231 2232 /* 2233 * Do whatever is needed to sync the MOD/REF flags 2234 */ 2235 2236 boolean_t 2237 pmap_clear_modify(struct vm_page *pg) 2238 { 2239 paddr_t pa = VM_PAGE_TO_PHYS(pg); 2240 int changed = 0; 2241 pv_entry_t pv; 2242 2243 /* Clear all mappings */ 2244 mtx_enter(&pg->mdpage.pvmtx); 2245 pv = pa_to_pvh(pa); 2246 if (pv->pv_va & PV_MOD) 2247 changed |= 1; 2248 pv->pv_va &= ~(PV_MOD); 2249 if (pv->pv_pmap != NULL) { 2250 for (; pv; pv = pv->pv_next) { 2251 int64_t data; 2252 2253 /* First clear the mod bit in the PTE and make it R/O */ 2254 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2255 2256 /* Need to both clear the modify and write bits */ 2257 if (CPU_ISSUN4V) { 2258 if (data & (SUN4V_TLB_MODIFY)) 2259 changed |= 1; 2260 data &= ~(SUN4V_TLB_MODIFY|SUN4V_TLB_W); 2261 } else { 2262 if (data & (SUN4U_TLB_MODIFY)) 2263 changed |= 1; 2264 data &= ~(SUN4U_TLB_MODIFY|SUN4U_TLB_W); 2265 } 2266 KDASSERT((data & TLB_NFO) == 0); 2267 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) { 2268 printf("pmap_clear_modify: gotten pseg empty!\n"); 2269 Debugger(); 2270 /* panic? */ 2271 } 2272 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2273 tsb_invalidate(pv->pv_pmap->pm_ctx, 2274 (pv->pv_va & PV_VAMASK)); 2275 tlb_flush_pte((pv->pv_va & PV_VAMASK), 2276 pv->pv_pmap->pm_ctx); 2277 } 2278 /* Then clear the mod bit in the pv */ 2279 if (pv->pv_va & PV_MOD) 2280 changed |= 1; 2281 pv->pv_va &= ~(PV_MOD); 2282 dcache_flush_page(pa); 2283 } 2284 } 2285 mtx_leave(&pg->mdpage.pvmtx); 2286 2287 return (changed); 2288 } 2289 2290 boolean_t 2291 pmap_clear_reference(struct vm_page *pg) 2292 { 2293 paddr_t pa = VM_PAGE_TO_PHYS(pg); 2294 int changed = 0; 2295 pv_entry_t pv; 2296 2297 /* Clear all references */ 2298 mtx_enter(&pg->mdpage.pvmtx); 2299 pv = pa_to_pvh(pa); 2300 if (pv->pv_va & PV_REF) 2301 changed = 1; 2302 pv->pv_va &= ~(PV_REF); 2303 if (pv->pv_pmap != NULL) { 2304 for (; pv; pv = pv->pv_next) { 2305 int64_t data; 2306 2307 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2308 if (CPU_ISSUN4V) { 2309 if (data & SUN4V_TLB_ACCESS) 2310 changed = 1; 2311 data &= ~SUN4V_TLB_ACCESS; 2312 } else { 2313 if (data & SUN4U_TLB_ACCESS) 2314 changed = 1; 2315 data &= ~SUN4U_TLB_ACCESS; 2316 } 2317 KDASSERT((data & TLB_NFO) == 0); 2318 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) { 2319 printf("pmap_clear_reference: gotten pseg empty!\n"); 2320 Debugger(); 2321 /* panic? */ 2322 } 2323 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2324 tsb_invalidate(pv->pv_pmap->pm_ctx, 2325 (pv->pv_va & PV_VAMASK)); 2326 /* 2327 tlb_flush_pte(pv->pv_va & PV_VAMASK, 2328 pv->pv_pmap->pm_ctx); 2329 */ 2330 } 2331 if (pv->pv_va & PV_REF) 2332 changed = 1; 2333 pv->pv_va &= ~(PV_REF); 2334 } 2335 } 2336 /* Stupid here will take a cache hit even on unmapped pages 8^( */ 2337 dcache_flush_page(VM_PAGE_TO_PHYS(pg)); 2338 mtx_leave(&pg->mdpage.pvmtx); 2339 2340 return (changed); 2341 } 2342 2343 boolean_t 2344 pmap_is_modified(struct vm_page *pg) 2345 { 2346 pv_entry_t pv, npv; 2347 int mod = 0; 2348 2349 /* Check if any mapping has been modified */ 2350 mtx_enter(&pg->mdpage.pvmtx); 2351 pv = &pg->mdpage.pvent; 2352 if (pv->pv_va & PV_MOD) 2353 mod = 1; 2354 if (!mod && (pv->pv_pmap != NULL)) { 2355 for (npv = pv; mod == 0 && npv && npv->pv_pmap; npv = npv->pv_next) { 2356 int64_t data; 2357 2358 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 2359 if (pmap_tte2flags(data) & PV_MOD) 2360 mod = 1; 2361 /* Migrate modify info to head pv */ 2362 if (npv->pv_va & PV_MOD) 2363 mod = 1; 2364 npv->pv_va &= ~PV_MOD; 2365 } 2366 } 2367 /* Save modify info */ 2368 if (mod) 2369 pv->pv_va |= PV_MOD; 2370 mtx_leave(&pg->mdpage.pvmtx); 2371 2372 return (mod); 2373 } 2374 2375 boolean_t 2376 pmap_is_referenced(struct vm_page *pg) 2377 { 2378 pv_entry_t pv, npv; 2379 int ref = 0; 2380 2381 /* Check if any mapping has been referenced */ 2382 mtx_enter(&pg->mdpage.pvmtx); 2383 pv = &pg->mdpage.pvent; 2384 if (pv->pv_va & PV_REF) 2385 ref = 1; 2386 if (!ref && (pv->pv_pmap != NULL)) { 2387 for (npv = pv; npv; npv = npv->pv_next) { 2388 int64_t data; 2389 2390 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 2391 if (pmap_tte2flags(data) & PV_REF) 2392 ref = 1; 2393 /* Migrate modify info to head pv */ 2394 if (npv->pv_va & PV_REF) 2395 ref = 1; 2396 npv->pv_va &= ~PV_REF; 2397 } 2398 } 2399 /* Save ref info */ 2400 if (ref) 2401 pv->pv_va |= PV_REF; 2402 mtx_leave(&pg->mdpage.pvmtx); 2403 2404 return (ref); 2405 } 2406 2407 /* 2408 * Routine: pmap_unwire 2409 * Function: Clear the wired attribute for a map/virtual-address 2410 * pair. 2411 * In/out conditions: 2412 * The mapping must already exist in the pmap. 2413 */ 2414 void 2415 pmap_unwire(struct pmap *pmap, vaddr_t va) 2416 { 2417 int64_t data; 2418 2419 if (pmap == NULL) 2420 return; 2421 2422 /* 2423 * Is this part of the permanent 4MB mapping? 2424 */ 2425 if (pmap == pmap_kernel() && va >= ktext && 2426 va < roundup(ekdata, 4*MEG)) { 2427 prom_printf("pmap_unwire: va=%08x in locked TLB\r\n", va); 2428 OF_enter(); 2429 return; 2430 } 2431 mtx_enter(&pmap->pm_mtx); 2432 data = pseg_get(pmap, va & PV_VAMASK); 2433 2434 if (CPU_ISSUN4V) 2435 data &= ~SUN4V_TLB_TSB_LOCK; 2436 else 2437 data &= ~SUN4U_TLB_TSB_LOCK; 2438 2439 if (pseg_set(pmap, va & PV_VAMASK, data, 0)) { 2440 printf("pmap_unwire: gotten pseg empty!\n"); 2441 Debugger(); 2442 /* panic? */ 2443 } 2444 mtx_leave(&pmap->pm_mtx); 2445 } 2446 2447 /* 2448 * Lower the protection on the specified physical page. 2449 * 2450 * Never enable writing as it will break COW 2451 */ 2452 void 2453 pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 2454 { 2455 paddr_t pa = VM_PAGE_TO_PHYS(pg); 2456 pv_entry_t pv; 2457 int64_t data, clear, set; 2458 2459 if (prot & PROT_WRITE) 2460 return; 2461 2462 if (prot & (PROT_READ | PROT_EXEC)) { 2463 /* copy_on_write */ 2464 2465 set = TLB_V; 2466 if (CPU_ISSUN4V) { 2467 clear = SUN4V_TLB_REAL_W|SUN4V_TLB_W; 2468 if (PROT_EXEC & prot) 2469 set |= SUN4V_TLB_EXEC; 2470 else 2471 clear |= SUN4V_TLB_EXEC; 2472 } else { 2473 clear = SUN4U_TLB_REAL_W|SUN4U_TLB_W; 2474 if (PROT_EXEC & prot) 2475 set |= SUN4U_TLB_EXEC; 2476 else 2477 clear |= SUN4U_TLB_EXEC; 2478 if (PROT_EXEC == prot) 2479 set |= SUN4U_TLB_EXEC_ONLY; 2480 } 2481 2482 pv = pa_to_pvh(pa); 2483 mtx_enter(&pg->mdpage.pvmtx); 2484 if (pv->pv_pmap != NULL) { 2485 for (; pv; pv = pv->pv_next) { 2486 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2487 2488 /* Save REF/MOD info */ 2489 pv->pv_va |= pmap_tte2flags(data); 2490 2491 data &= ~(clear); 2492 data |= (set); 2493 KDASSERT((data & TLB_NFO) == 0); 2494 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) { 2495 printf("pmap_page_protect: gotten pseg empty!\n"); 2496 Debugger(); 2497 /* panic? */ 2498 } 2499 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2500 tsb_invalidate(pv->pv_pmap->pm_ctx, 2501 (pv->pv_va & PV_VAMASK)); 2502 tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx); 2503 } 2504 } 2505 } 2506 mtx_leave(&pg->mdpage.pvmtx); 2507 } else { 2508 pv_entry_t firstpv; 2509 /* remove mappings */ 2510 2511 firstpv = pa_to_pvh(pa); 2512 mtx_enter(&pg->mdpage.pvmtx); 2513 2514 /* First remove the entire list of continuation pv's*/ 2515 while ((pv = firstpv->pv_next) != NULL) { 2516 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2517 2518 /* Save REF/MOD info */ 2519 firstpv->pv_va |= pmap_tte2flags(data); 2520 2521 /* Clear mapping */ 2522 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) { 2523 printf("pmap_page_protect: gotten pseg empty!\n"); 2524 Debugger(); 2525 /* panic? */ 2526 } 2527 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2528 tsb_invalidate(pv->pv_pmap->pm_ctx, 2529 (pv->pv_va & PV_VAMASK)); 2530 tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx); 2531 } 2532 pv->pv_pmap->pm_stats.resident_count--; 2533 2534 /* free the pv */ 2535 firstpv->pv_next = pv->pv_next; 2536 mtx_leave(&pg->mdpage.pvmtx); 2537 pool_put(&pv_pool, pv); 2538 mtx_enter(&pg->mdpage.pvmtx); 2539 } 2540 2541 pv = firstpv; 2542 2543 /* Then remove the primary pv */ 2544 if (pv->pv_pmap != NULL) { 2545 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2546 2547 /* Save REF/MOD info */ 2548 pv->pv_va |= pmap_tte2flags(data); 2549 if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) { 2550 printf("pmap_page_protect: gotten pseg empty!\n"); 2551 Debugger(); 2552 /* panic? */ 2553 } 2554 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2555 tsb_invalidate(pv->pv_pmap->pm_ctx, 2556 (pv->pv_va & PV_VAMASK)); 2557 tlb_flush_pte(pv->pv_va & PV_VAMASK, 2558 pv->pv_pmap->pm_ctx); 2559 } 2560 pv->pv_pmap->pm_stats.resident_count--; 2561 KASSERT(pv->pv_next == NULL); 2562 /* dump the first pv */ 2563 pv->pv_pmap = NULL; 2564 } 2565 dcache_flush_page(pa); 2566 mtx_leave(&pg->mdpage.pvmtx); 2567 } 2568 /* We should really only flush the pages we demapped. */ 2569 } 2570 2571 /* 2572 * Allocate a context. If necessary, steal one from someone else. 2573 * Changes hardware context number and loads segment map. 2574 * 2575 * This routine is only ever called from locore.s just after it has 2576 * saved away the previous process, so there are no active user windows. 2577 * 2578 * The new context is flushed from the TLB before returning. 2579 */ 2580 int 2581 ctx_alloc(struct pmap *pm) 2582 { 2583 int s, cnum; 2584 static int next = 0; 2585 2586 if (pm == pmap_kernel()) { 2587 #ifdef DIAGNOSTIC 2588 printf("ctx_alloc: kernel pmap!\n"); 2589 #endif 2590 return (0); 2591 } 2592 s = splvm(); 2593 cnum = next; 2594 do { 2595 /* 2596 * We use the last context as an "invalid" context in 2597 * TSB tags. Never allocate (or bad things will happen). 2598 */ 2599 if (cnum >= numctx - 2) 2600 cnum = 0; 2601 } while (ctxbusy[++cnum] != 0 && cnum != next); 2602 if (cnum==0) cnum++; /* Never steal ctx 0 */ 2603 if (ctxbusy[cnum]) { 2604 int i; 2605 /* We gotta steal this context */ 2606 for (i = 0; i < TSBENTS; i++) { 2607 if (TSB_TAG_CTX(tsb_dmmu[i].tag) == cnum) 2608 tsb_dmmu[i].tag = TSB_TAG_INVALID; 2609 if (TSB_TAG_CTX(tsb_immu[i].tag) == cnum) 2610 tsb_immu[i].tag = TSB_TAG_INVALID; 2611 } 2612 tlb_flush_ctx(cnum); 2613 } 2614 ctxbusy[cnum] = pm->pm_physaddr; 2615 next = cnum; 2616 splx(s); 2617 pm->pm_ctx = cnum; 2618 return cnum; 2619 } 2620 2621 /* 2622 * Give away a context. 2623 */ 2624 void 2625 ctx_free(struct pmap *pm) 2626 { 2627 int oldctx; 2628 2629 oldctx = pm->pm_ctx; 2630 2631 if (oldctx == 0) 2632 panic("ctx_free: freeing kernel context"); 2633 #ifdef DIAGNOSTIC 2634 if (ctxbusy[oldctx] == 0) 2635 printf("ctx_free: freeing free context %d\n", oldctx); 2636 if (ctxbusy[oldctx] != pm->pm_physaddr) { 2637 printf("ctx_free: freeing someone esle's context\n " 2638 "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n", 2639 oldctx, (void *)(u_long)ctxbusy[oldctx], pm, 2640 (void *)(u_long)pm->pm_physaddr); 2641 Debugger(); 2642 } 2643 #endif 2644 /* We should verify it has not been stolen and reallocated... */ 2645 ctxbusy[oldctx] = 0; 2646 } 2647 2648 /* 2649 * Enter the pmap and virtual address into the 2650 * physical to virtual map table. 2651 */ 2652 void 2653 pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa) 2654 { 2655 pv_entry_t pv, npv = NULL; 2656 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 2657 2658 pv = pa_to_pvh(pa); 2659 mtx_enter(&pg->mdpage.pvmtx); 2660 2661 retry: 2662 if (pv->pv_pmap == NULL) { 2663 /* 2664 * No entries yet, use header as the first entry 2665 */ 2666 PV_SETVA(pv, va); 2667 pv->pv_pmap = pmap; 2668 pv->pv_next = NULL; 2669 mtx_leave(&pg->mdpage.pvmtx); 2670 if (npv) 2671 pool_put(&pv_pool, npv); 2672 return; 2673 } 2674 2675 if (npv == NULL) { 2676 mtx_leave(&pg->mdpage.pvmtx); 2677 npv = pool_get(&pv_pool, PR_NOWAIT); 2678 if (npv == NULL) 2679 panic("%s: no pv entries available", __func__); 2680 mtx_enter(&pg->mdpage.pvmtx); 2681 goto retry; 2682 } 2683 2684 if (!(pv->pv_va & PV_ALIAS)) { 2685 /* 2686 * There is at least one other VA mapping this page. 2687 * Check if they are cache index compatible. If not 2688 * remove all mappings, flush the cache and set page 2689 * to be mapped uncached. Caching will be restored 2690 * when pages are mapped compatible again. 2691 * XXX - caching is not currently being restored, but 2692 * XXX - I haven't seen the pages uncached since 2693 * XXX - using pmap_prefer(). mhitch 2694 */ 2695 if ((pv->pv_va ^ va) & VA_ALIAS_MASK) { 2696 pv->pv_va |= PV_ALIAS; 2697 pmap_page_cache(pmap, pa, 0); 2698 } 2699 } 2700 2701 /* 2702 * There is at least one other VA mapping this page. 2703 * Place this entry after the header. 2704 */ 2705 npv->pv_va = va & PV_VAMASK; 2706 npv->pv_pmap = pmap; 2707 npv->pv_next = pv->pv_next; 2708 pv->pv_next = npv; 2709 2710 mtx_leave(&pg->mdpage.pvmtx); 2711 } 2712 2713 /* 2714 * Remove a physical to virtual address translation. 2715 */ 2716 void 2717 pmap_remove_pv(struct pmap *pmap, vaddr_t va, paddr_t pa) 2718 { 2719 pv_entry_t pv, opv, npv = NULL; 2720 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 2721 int64_t data = 0LL; 2722 2723 opv = pv = pa_to_pvh(pa); 2724 mtx_enter(&pg->mdpage.pvmtx); 2725 2726 /* 2727 * If it is the first entry on the list, it is actually 2728 * in the header and we must copy the following entry up 2729 * to the header. Otherwise we must search the list for 2730 * the entry. In either case we free the now unused entry. 2731 */ 2732 if (pmap == pv->pv_pmap && PV_MATCH(pv, va)) { 2733 /* Save modified/ref bits */ 2734 data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK); 2735 npv = pv->pv_next; 2736 if (npv) { 2737 /* First save mod/ref bits */ 2738 pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va; 2739 pv->pv_next = npv->pv_next; 2740 pv->pv_pmap = npv->pv_pmap; 2741 } else { 2742 pv->pv_pmap = NULL; 2743 pv->pv_next = NULL; 2744 pv->pv_va &= (PV_REF|PV_MOD); /* Only save ref/mod bits */ 2745 } 2746 } else { 2747 for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) { 2748 if (pmap == npv->pv_pmap && PV_MATCH(npv, va)) 2749 goto found; 2750 } 2751 2752 /* 2753 * Sometimes UVM gets confused and calls pmap_remove() instead 2754 * of pmap_kremove() 2755 */ 2756 mtx_leave(&pg->mdpage.pvmtx); 2757 return; 2758 found: 2759 pv->pv_next = npv->pv_next; 2760 2761 /* 2762 * move any referenced/modified info to the base pv 2763 */ 2764 data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK); 2765 2766 /* 2767 * Here, if this page was aliased, we should try clear out any 2768 * alias that may have occurred. However, that's a complicated 2769 * operation involving multiple scans of the pv list. 2770 */ 2771 } 2772 2773 /* Save REF/MOD info */ 2774 opv->pv_va |= pmap_tte2flags(data); 2775 2776 /* Check to see if the alias went away */ 2777 if (opv->pv_va & PV_ALIAS) { 2778 opv->pv_va &= ~PV_ALIAS; 2779 for (pv = opv; pv; pv = pv->pv_next) { 2780 if ((pv->pv_va ^ opv->pv_va) & VA_ALIAS_MASK) { 2781 opv->pv_va |= PV_ALIAS; 2782 } 2783 } 2784 if (!(opv->pv_va & PV_ALIAS)) 2785 pmap_page_cache(pmap, pa, 1); 2786 } 2787 2788 mtx_leave(&pg->mdpage.pvmtx); 2789 2790 if (npv) 2791 pool_put(&pv_pool, npv); 2792 } 2793 2794 /* 2795 * pmap_page_cache: 2796 * 2797 * Change all mappings of a page to cached/uncached. 2798 */ 2799 void 2800 pmap_page_cache(struct pmap *pm, paddr_t pa, int mode) 2801 { 2802 pv_entry_t pv; 2803 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 2804 2805 if (CPU_ISSUN4US || CPU_ISSUN4V) 2806 return; 2807 2808 pv = &pg->mdpage.pvent; 2809 if (pv == NULL) 2810 return; 2811 2812 MUTEX_ASSERT_LOCKED(&pg->mdpage.pvmtx); 2813 2814 while (pv) { 2815 vaddr_t va; 2816 2817 va = (pv->pv_va & PV_VAMASK); 2818 if (pv->pv_va & PV_NC) { 2819 /* Non-cached -- I/O mapping */ 2820 if (pseg_set(pv->pv_pmap, va, 2821 pseg_get(pv->pv_pmap, va) & ~(SUN4U_TLB_CV|SUN4U_TLB_CP), 2822 0)) { 2823 printf("pmap_page_cache: aliased pseg empty!\n"); 2824 Debugger(); 2825 /* panic? */ 2826 } 2827 } else if (mode && (!(pv->pv_va & PV_NVC))) { 2828 /* Enable caching */ 2829 if (pseg_set(pv->pv_pmap, va, 2830 pseg_get(pv->pv_pmap, va) | SUN4U_TLB_CV, 0)) { 2831 printf("pmap_page_cache: aliased pseg empty!\n"); 2832 Debugger(); 2833 /* panic? */ 2834 } 2835 } else { 2836 /* Disable caching */ 2837 if (pseg_set(pv->pv_pmap, va, 2838 pseg_get(pv->pv_pmap, va) & ~SUN4U_TLB_CV, 0)) { 2839 printf("pmap_page_cache: aliased pseg empty!\n"); 2840 Debugger(); 2841 /* panic? */ 2842 } 2843 } 2844 if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) { 2845 tsb_invalidate(pv->pv_pmap->pm_ctx, va); 2846 /* Force reload -- protections may be changed */ 2847 tlb_flush_pte(va, pv->pv_pmap->pm_ctx); 2848 } 2849 2850 pv = pv->pv_next; 2851 } 2852 } 2853 2854 int 2855 pmap_get_page(paddr_t *pa, const char *wait, struct pmap *pm) 2856 { 2857 int reserve = pm == pmap_kernel() ? UVM_PGA_USERESERVE : 0; 2858 2859 if (uvm.page_init_done) { 2860 struct vm_page *pg; 2861 2862 while ((pg = uvm_pagealloc(NULL, 0, NULL, 2863 UVM_PGA_ZERO|reserve)) == NULL) { 2864 if (wait == NULL) 2865 return 0; 2866 uvm_wait(wait); 2867 } 2868 pg->wire_count++; 2869 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 2870 *pa = VM_PAGE_TO_PHYS(pg); 2871 } else { 2872 uvm_page_physget(pa); 2873 pmap_zero_phys(*pa); 2874 } 2875 2876 return (1); 2877 } 2878 2879 void 2880 pmap_free_page(paddr_t pa, struct pmap *pm) 2881 { 2882 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 2883 2884 pg->wire_count = 0; 2885 uvm_pagefree(pg); 2886 } 2887 2888 void 2889 pmap_remove_holes(struct vmspace *vm) 2890 { 2891 vaddr_t shole, ehole; 2892 struct vm_map *map = &vm->vm_map; 2893 2894 /* 2895 * Although the hardware only supports 44-bit virtual addresses 2896 * (and thus a hole from 1 << 43 to -1 << 43), this pmap 2897 * implementation itself only supports 43-bit virtual addresses, 2898 * so we have to narrow the hole a bit more. 2899 */ 2900 shole = 1L << (HOLESHIFT - 1); 2901 ehole = -1L << (HOLESHIFT - 1); 2902 2903 shole = ulmax(vm_map_min(map), shole); 2904 ehole = ulmin(vm_map_max(map), ehole); 2905 2906 if (ehole <= shole) 2907 return; 2908 2909 (void)uvm_map(map, &shole, ehole - shole, NULL, UVM_UNKNOWN_OFFSET, 0, 2910 UVM_MAPFLAG(PROT_NONE, PROT_NONE, MAP_INHERIT_SHARE, MADV_RANDOM, 2911 UVM_FLAG_NOMERGE | UVM_FLAG_HOLE | UVM_FLAG_FIXED)); 2912 } 2913 2914 #ifdef DDB 2915 2916 void 2917 db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, char *modif) 2918 { 2919 struct pv_entry *pv; 2920 2921 if (!have_addr) { 2922 db_printf("Need addr for pv\n"); 2923 return; 2924 } 2925 2926 for (pv = pa_to_pvh(addr); pv; pv = pv->pv_next) 2927 db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n", 2928 pv, pv->pv_next, pv->pv_pmap, 2929 (unsigned long long)pv->pv_va); 2930 2931 } 2932 2933 #endif 2934