1 /* 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the Systems Programming Group of the University of Utah Computer 7 * Science Department and Ralph Campbell. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)pmap.c 8.3 (Berkeley) 01/11/94 12 */ 13 14 /* 15 * Manages physical address maps. 16 * 17 * In addition to hardware address maps, this 18 * module is called upon to provide software-use-only 19 * maps which may or may not be stored in the same 20 * form as hardware maps. These pseudo-maps are 21 * used to store intermediate results from copy 22 * operations to and from address spaces. 23 * 24 * Since the information managed by this module is 25 * also stored by the logical address mapping module, 26 * this module may throw away valid virtual-to-physical 27 * mappings at almost any time. However, invalidations 28 * of virtual-to-physical mappings must be done as 29 * requested. 30 * 31 * In order to cope with hardware architectures which 32 * make virtual-to-physical map invalidates expensive, 33 * this module may delay invalidate or reduced protection 34 * operations until such time as they are actually 35 * necessary. This module is given full information as 36 * to which processors are currently using which maps, 37 * and to when physical maps must be made correct. 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/malloc.h> 44 #include <sys/user.h> 45 #include <sys/buf.h> 46 47 #include <vm/vm_kern.h> 48 #include <vm/vm_page.h> 49 #include <vm/vm_pageout.h> 50 51 #include <machine/machConst.h> 52 #include <machine/pte.h> 53 54 extern vm_page_t vm_page_alloc1 __P((void)); 55 extern void vm_page_free1 __P((vm_page_t)); 56 57 /* 58 * For each vm_page_t, there is a list of all currently valid virtual 59 * mappings of that page. An entry is a pv_entry_t, the list is pv_table. 60 * XXX really should do this as a part of the higher level code. 61 */ 62 typedef struct pv_entry { 63 struct pv_entry *pv_next; /* next pv_entry */ 64 struct pmap *pv_pmap; /* pmap where mapping lies */ 65 vm_offset_t pv_va; /* virtual address for mapping */ 66 } *pv_entry_t; 67 68 pv_entry_t pv_table; /* array of entries, one per page */ 69 extern void pmap_remove_pv(); 70 71 #define pa_index(pa) atop((pa) - first_phys_addr) 72 #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 73 74 #ifdef DEBUG 75 struct { 76 int kernel; /* entering kernel mapping */ 77 int user; /* entering user mapping */ 78 int ptpneeded; /* needed to allocate a PT page */ 79 int pwchange; /* no mapping change, just wiring or protection */ 80 int wchange; /* no mapping change, just wiring */ 81 int mchange; /* was mapped but mapping to different page */ 82 int managed; /* a managed page */ 83 int firstpv; /* first mapping for this PA */ 84 int secondpv; /* second mapping for this PA */ 85 int ci; /* cache inhibited */ 86 int unmanaged; /* not a managed page */ 87 int flushes; /* cache flushes */ 88 int cachehit; /* new entry forced valid entry out */ 89 } enter_stats; 90 struct { 91 int calls; 92 int removes; 93 int flushes; 94 int pidflushes; /* HW pid stolen */ 95 int pvfirst; 96 int pvsearch; 97 } remove_stats; 98 99 int pmapdebug; 100 #define PDB_FOLLOW 0x0001 101 #define PDB_INIT 0x0002 102 #define PDB_ENTER 0x0004 103 #define PDB_REMOVE 0x0008 104 #define PDB_CREATE 0x0010 105 #define PDB_PTPAGE 0x0020 106 #define PDB_PVENTRY 0x0040 107 #define PDB_BITS 0x0080 108 #define PDB_COLLECT 0x0100 109 #define PDB_PROTECT 0x0200 110 #define PDB_TLBPID 0x0400 111 #define PDB_PARANOIA 0x2000 112 #define PDB_WIRING 0x4000 113 #define PDB_PVDUMP 0x8000 114 115 #endif /* DEBUG */ 116 117 struct pmap kernel_pmap_store; 118 119 vm_offset_t avail_start; /* PA of first available physical page */ 120 vm_offset_t avail_end; /* PA of last available physical page */ 121 vm_size_t mem_size; /* memory size in bytes */ 122 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss)*/ 123 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 124 int pmaxpagesperpage; /* PAGE_SIZE / NBPG */ 125 #ifdef ATTR 126 char *pmap_attributes; /* reference and modify bits */ 127 #endif 128 struct segtab *free_segtab; /* free list kept locally */ 129 u_int tlbpid_gen = 1; /* TLB PID generation count */ 130 int tlbpid_cnt = 2; /* next available TLB PID */ 131 pt_entry_t *Sysmap; /* kernel pte table */ 132 u_int Sysmapsize; /* number of pte's in Sysmap */ 133 134 /* 135 * Bootstrap the system enough to run with virtual memory. 136 * firstaddr is the first unused kseg0 address (not page aligned). 137 */ 138 void 139 pmap_bootstrap(firstaddr) 140 vm_offset_t firstaddr; 141 { 142 register int i; 143 vm_offset_t start = firstaddr; 144 extern int maxmem, physmem; 145 146 #define valloc(name, type, num) \ 147 (name) = (type *)firstaddr; firstaddr = (vm_offset_t)((name)+(num)) 148 /* 149 * Allocate a PTE table for the kernel. 150 * The first '256' comes from PAGER_MAP_SIZE in vm_pager_init(). 151 * This should be kept in sync. 152 * We also reserve space for kmem_alloc_pageable() for vm_fork(). 153 */ 154 Sysmapsize = (VM_KMEM_SIZE + VM_MBUF_SIZE + VM_PHYS_SIZE + 155 nbuf * MAXBSIZE + 16 * NCARGS) / NBPG + 256 + 256; 156 valloc(Sysmap, pt_entry_t, Sysmapsize); 157 #ifdef ATTR 158 valloc(pmap_attributes, char, physmem); 159 #endif 160 /* 161 * Allocate memory for pv_table. 162 * This will allocate more entries than we really need. 163 * We could do this in pmap_init when we know the actual 164 * phys_start and phys_end but its better to use kseg0 addresses 165 * rather than kernel virtual addresses mapped through the TLB. 166 */ 167 i = maxmem - pmax_btop(MACH_CACHED_TO_PHYS(firstaddr)); 168 valloc(pv_table, struct pv_entry, i); 169 170 /* 171 * Clear allocated memory. 172 */ 173 firstaddr = pmax_round_page(firstaddr); 174 bzero((caddr_t)start, firstaddr - start); 175 176 avail_start = MACH_CACHED_TO_PHYS(firstaddr); 177 avail_end = pmax_ptob(maxmem); 178 mem_size = avail_end - avail_start; 179 180 virtual_avail = VM_MIN_KERNEL_ADDRESS; 181 virtual_end = VM_MIN_KERNEL_ADDRESS + Sysmapsize * NBPG; 182 /* XXX need to decide how to set cnt.v_page_size */ 183 pmaxpagesperpage = 1; 184 185 simple_lock_init(&kernel_pmap_store.pm_lock); 186 kernel_pmap_store.pm_count = 1; 187 } 188 189 /* 190 * Bootstrap memory allocator. This function allows for early dynamic 191 * memory allocation until the virtual memory system has been bootstrapped. 192 * After that point, either kmem_alloc or malloc should be used. This 193 * function works by stealing pages from the (to be) managed page pool, 194 * stealing virtual address space, then mapping the pages and zeroing them. 195 * 196 * It should be used from pmap_bootstrap till vm_page_startup, afterwards 197 * it cannot be used, and will generate a panic if tried. Note that this 198 * memory will never be freed, and in essence it is wired down. 199 */ 200 void * 201 pmap_bootstrap_alloc(size) 202 int size; 203 { 204 vm_offset_t val; 205 extern boolean_t vm_page_startup_initialized; 206 207 if (vm_page_startup_initialized) 208 panic("pmap_bootstrap_alloc: called after startup initialized"); 209 210 val = MACH_PHYS_TO_CACHED(avail_start); 211 size = round_page(size); 212 avail_start += size; 213 214 blkclr((caddr_t)val, size); 215 return ((void *)val); 216 } 217 218 /* 219 * Initialize the pmap module. 220 * Called by vm_init, to initialize any structures that the pmap 221 * system needs to map virtual memory. 222 */ 223 void 224 pmap_init(phys_start, phys_end) 225 vm_offset_t phys_start, phys_end; 226 { 227 228 #ifdef DEBUG 229 if (pmapdebug & (PDB_FOLLOW|PDB_INIT)) 230 printf("pmap_init(%x, %x)\n", phys_start, phys_end); 231 #endif 232 } 233 234 /* 235 * Create and return a physical map. 236 * 237 * If the size specified for the map 238 * is zero, the map is an actual physical 239 * map, and may be referenced by the 240 * hardware. 241 * 242 * If the size specified is non-zero, 243 * the map will be used in software only, and 244 * is bounded by that size. 245 */ 246 pmap_t 247 pmap_create(size) 248 vm_size_t size; 249 { 250 register pmap_t pmap; 251 252 #ifdef DEBUG 253 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 254 printf("pmap_create(%x)\n", size); 255 #endif 256 /* 257 * Software use map does not need a pmap 258 */ 259 if (size) 260 return (NULL); 261 262 /* XXX: is it ok to wait here? */ 263 pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK); 264 #ifdef notifwewait 265 if (pmap == NULL) 266 panic("pmap_create: cannot allocate a pmap"); 267 #endif 268 bzero(pmap, sizeof(*pmap)); 269 pmap_pinit(pmap); 270 return (pmap); 271 } 272 273 /* 274 * Initialize a preallocated and zeroed pmap structure, 275 * such as one in a vmspace structure. 276 */ 277 void 278 pmap_pinit(pmap) 279 register struct pmap *pmap; 280 { 281 register int i; 282 int s; 283 extern struct vmspace vmspace0; 284 extern struct user *proc0paddr; 285 286 #ifdef DEBUG 287 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 288 printf("pmap_pinit(%x)\n", pmap); 289 #endif 290 simple_lock_init(&pmap->pm_lock); 291 pmap->pm_count = 1; 292 if (free_segtab) { 293 s = splimp(); 294 pmap->pm_segtab = free_segtab; 295 free_segtab = *(struct segtab **)free_segtab; 296 pmap->pm_segtab->seg_tab[0] = NULL; 297 splx(s); 298 } else { 299 register struct segtab *stp; 300 vm_page_t mem; 301 302 mem = vm_page_alloc1(); 303 pmap_zero_page(VM_PAGE_TO_PHYS(mem)); 304 pmap->pm_segtab = stp = (struct segtab *) 305 MACH_PHYS_TO_CACHED(VM_PAGE_TO_PHYS(mem)); 306 i = pmaxpagesperpage * (NBPG / sizeof(struct segtab)); 307 s = splimp(); 308 while (--i != 0) { 309 stp++; 310 *(struct segtab **)stp = free_segtab; 311 free_segtab = stp; 312 } 313 splx(s); 314 } 315 #ifdef DIAGNOSTIC 316 for (i = 0; i < PMAP_SEGTABSIZE; i++) 317 if (pmap->pm_segtab->seg_tab[i] != 0) 318 panic("pmap_pinit: pm_segtab != 0"); 319 #endif 320 if (pmap == &vmspace0.vm_pmap) { 321 /* 322 * The initial process has already been allocated a TLBPID 323 * in mach_init(). 324 */ 325 pmap->pm_tlbpid = 1; 326 pmap->pm_tlbgen = tlbpid_gen; 327 proc0paddr->u_pcb.pcb_segtab = (void *)pmap->pm_segtab; 328 } else { 329 pmap->pm_tlbpid = 0; 330 pmap->pm_tlbgen = 0; 331 } 332 } 333 334 /* 335 * Retire the given physical map from service. 336 * Should only be called if the map contains 337 * no valid mappings. 338 */ 339 void 340 pmap_destroy(pmap) 341 register pmap_t pmap; 342 { 343 int count; 344 345 #ifdef DEBUG 346 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 347 printf("pmap_destroy(%x)\n", pmap); 348 #endif 349 if (pmap == NULL) 350 return; 351 352 simple_lock(&pmap->pm_lock); 353 count = --pmap->pm_count; 354 simple_unlock(&pmap->pm_lock); 355 if (count == 0) { 356 pmap_release(pmap); 357 free((caddr_t)pmap, M_VMPMAP); 358 } 359 } 360 361 /* 362 * Release any resources held by the given physical map. 363 * Called when a pmap initialized by pmap_pinit is being released. 364 * Should only be called if the map contains no valid mappings. 365 */ 366 void 367 pmap_release(pmap) 368 register pmap_t pmap; 369 { 370 371 #ifdef DEBUG 372 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 373 printf("pmap_release(%x)\n", pmap); 374 #endif 375 376 if (pmap->pm_segtab) { 377 register pt_entry_t *pte; 378 register int i; 379 int s; 380 #ifdef DIAGNOSTIC 381 register int j; 382 #endif 383 384 for (i = 0; i < PMAP_SEGTABSIZE; i++) { 385 /* get pointer to segment map */ 386 pte = pmap->pm_segtab->seg_tab[i]; 387 if (!pte) 388 continue; 389 vm_page_free1( 390 PHYS_TO_VM_PAGE(MACH_CACHED_TO_PHYS(pte))); 391 #ifdef DIAGNOSTIC 392 for (j = 0; j < NPTEPG; j++) { 393 if (pte->pt_entry) 394 panic("pmap_release: segmap not empty"); 395 } 396 #endif 397 pmap->pm_segtab->seg_tab[i] = NULL; 398 } 399 s = splimp(); 400 *(struct segtab **)pmap->pm_segtab = free_segtab; 401 free_segtab = pmap->pm_segtab; 402 splx(s); 403 pmap->pm_segtab = NULL; 404 } 405 } 406 407 /* 408 * Add a reference to the specified pmap. 409 */ 410 void 411 pmap_reference(pmap) 412 pmap_t pmap; 413 { 414 415 #ifdef DEBUG 416 if (pmapdebug & PDB_FOLLOW) 417 printf("pmap_reference(%x)\n", pmap); 418 #endif 419 if (pmap != NULL) { 420 simple_lock(&pmap->pm_lock); 421 pmap->pm_count++; 422 simple_unlock(&pmap->pm_lock); 423 } 424 } 425 426 /* 427 * Remove the given range of addresses from the specified map. 428 * 429 * It is assumed that the start and end are properly 430 * rounded to the page size. 431 */ 432 void 433 pmap_remove(pmap, sva, eva) 434 register pmap_t pmap; 435 vm_offset_t sva, eva; 436 { 437 register vm_offset_t nssva; 438 register pt_entry_t *pte; 439 unsigned entry; 440 441 #ifdef DEBUG 442 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 443 printf("pmap_remove(%x, %x, %x)\n", pmap, sva, eva); 444 remove_stats.calls++; 445 #endif 446 if (pmap == NULL) 447 return; 448 449 if (!pmap->pm_segtab) { 450 register pt_entry_t *pte; 451 452 /* remove entries from kernel pmap */ 453 #ifdef DIAGNOSTIC 454 if (sva < VM_MIN_KERNEL_ADDRESS || eva > virtual_end) 455 panic("pmap_remove: kva not in range"); 456 #endif 457 pte = kvtopte(sva); 458 for (; sva < eva; sva += NBPG, pte++) { 459 entry = pte->pt_entry; 460 if (!(entry & PG_V)) 461 continue; 462 if (entry & PG_WIRED) 463 pmap->pm_stats.wired_count--; 464 pmap->pm_stats.resident_count--; 465 pmap_remove_pv(pmap, sva, entry & PG_FRAME); 466 #ifdef ATTR 467 pmap_attributes[atop(entry & PG_FRAME)] = 0; 468 #endif 469 pte->pt_entry = PG_NV; 470 /* 471 * Flush the TLB for the given address. 472 */ 473 MachTLBFlushAddr(sva); 474 #ifdef DEBUG 475 remove_stats.flushes++; 476 #endif 477 } 478 return; 479 } 480 481 #ifdef DIAGNOSTIC 482 if (eva > VM_MAXUSER_ADDRESS) 483 panic("pmap_remove: uva not in range"); 484 #endif 485 while (sva < eva) { 486 nssva = pmax_trunc_seg(sva) + NBSEG; 487 if (nssva == 0 || nssva > eva) 488 nssva = eva; 489 /* 490 * If VA belongs to an unallocated segment, 491 * skip to the next segment boundary. 492 */ 493 if (!(pte = pmap_segmap(pmap, sva))) { 494 sva = nssva; 495 continue; 496 } 497 /* 498 * Invalidate every valid mapping within this segment. 499 */ 500 pte += (sva >> PGSHIFT) & (NPTEPG - 1); 501 for (; sva < nssva; sva += NBPG, pte++) { 502 entry = pte->pt_entry; 503 if (!(entry & PG_V)) 504 continue; 505 if (entry & PG_WIRED) 506 pmap->pm_stats.wired_count--; 507 pmap->pm_stats.resident_count--; 508 pmap_remove_pv(pmap, sva, entry & PG_FRAME); 509 #ifdef ATTR 510 pmap_attributes[atop(entry & PG_FRAME)] = 0; 511 #endif 512 pte->pt_entry = PG_NV; 513 /* 514 * Flush the TLB for the given address. 515 */ 516 if (pmap->pm_tlbgen == tlbpid_gen) { 517 MachTLBFlushAddr(sva | (pmap->pm_tlbpid << 518 VMMACH_TLB_PID_SHIFT)); 519 #ifdef DEBUG 520 remove_stats.flushes++; 521 #endif 522 } 523 } 524 } 525 } 526 527 /* 528 * pmap_page_protect: 529 * 530 * Lower the permission for all mappings to a given page. 531 */ 532 void 533 pmap_page_protect(pa, prot) 534 vm_offset_t pa; 535 vm_prot_t prot; 536 { 537 register pv_entry_t pv; 538 register vm_offset_t va; 539 int s; 540 541 #ifdef DEBUG 542 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 543 prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)) 544 printf("pmap_page_protect(%x, %x)\n", pa, prot); 545 #endif 546 if (!IS_VM_PHYSADDR(pa)) 547 return; 548 549 switch (prot) { 550 case VM_PROT_READ|VM_PROT_WRITE: 551 case VM_PROT_ALL: 552 break; 553 554 /* copy_on_write */ 555 case VM_PROT_READ: 556 case VM_PROT_READ|VM_PROT_EXECUTE: 557 pv = pa_to_pvh(pa); 558 s = splimp(); 559 /* 560 * Loop over all current mappings setting/clearing as appropos. 561 */ 562 if (pv->pv_pmap != NULL) { 563 for (; pv; pv = pv->pv_next) { 564 extern vm_offset_t pager_sva, pager_eva; 565 566 va = pv->pv_va; 567 568 /* 569 * XXX don't write protect pager mappings 570 */ 571 if (va >= pager_sva && va < pager_eva) 572 continue; 573 pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, 574 prot); 575 } 576 } 577 splx(s); 578 break; 579 580 /* remove_all */ 581 default: 582 pv = pa_to_pvh(pa); 583 s = splimp(); 584 while (pv->pv_pmap != NULL) { 585 pmap_remove(pv->pv_pmap, pv->pv_va, 586 pv->pv_va + PAGE_SIZE); 587 } 588 splx(s); 589 } 590 } 591 592 /* 593 * Set the physical protection on the 594 * specified range of this map as requested. 595 */ 596 void 597 pmap_protect(pmap, sva, eva, prot) 598 register pmap_t pmap; 599 vm_offset_t sva, eva; 600 vm_prot_t prot; 601 { 602 register vm_offset_t nssva; 603 register pt_entry_t *pte; 604 register unsigned entry; 605 u_int p; 606 607 #ifdef DEBUG 608 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 609 printf("pmap_protect(%x, %x, %x, %x)\n", pmap, sva, eva, prot); 610 #endif 611 if (pmap == NULL) 612 return; 613 614 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 615 pmap_remove(pmap, sva, eva); 616 return; 617 } 618 619 p = (prot & VM_PROT_WRITE) ? PG_RW : PG_RO; 620 621 if (!pmap->pm_segtab) { 622 /* 623 * Change entries in kernel pmap. 624 * This will trap if the page is writeable (in order to set 625 * the dirty bit) even if the dirty bit is already set. The 626 * optimization isn't worth the effort since this code isn't 627 * executed much. The common case is to make a user page 628 * read-only. 629 */ 630 #ifdef DIAGNOSTIC 631 if (sva < VM_MIN_KERNEL_ADDRESS || eva > virtual_end) 632 panic("pmap_protect: kva not in range"); 633 #endif 634 pte = kvtopte(sva); 635 for (; sva < eva; sva += NBPG, pte++) { 636 entry = pte->pt_entry; 637 if (!(entry & PG_V)) 638 continue; 639 entry = (entry & ~(PG_M | PG_RO)) | p; 640 pte->pt_entry = entry; 641 /* 642 * Update the TLB if the given address is in the cache. 643 */ 644 MachTLBUpdate(sva, entry); 645 } 646 return; 647 } 648 649 #ifdef DIAGNOSTIC 650 if (eva > VM_MAXUSER_ADDRESS) 651 panic("pmap_protect: uva not in range"); 652 #endif 653 while (sva < eva) { 654 nssva = pmax_trunc_seg(sva) + NBSEG; 655 if (nssva == 0 || nssva > eva) 656 nssva = eva; 657 /* 658 * If VA belongs to an unallocated segment, 659 * skip to the next segment boundary. 660 */ 661 if (!(pte = pmap_segmap(pmap, sva))) { 662 sva = nssva; 663 continue; 664 } 665 /* 666 * Change protection on every valid mapping within this segment. 667 */ 668 pte += (sva >> PGSHIFT) & (NPTEPG - 1); 669 for (; sva < nssva; sva += NBPG, pte++) { 670 entry = pte->pt_entry; 671 if (!(entry & PG_V)) 672 continue; 673 entry = (entry & ~(PG_M | PG_RO)) | p; 674 pte->pt_entry = entry; 675 /* 676 * Update the TLB if the given address is in the cache. 677 */ 678 if (pmap->pm_tlbgen == tlbpid_gen) 679 MachTLBUpdate(sva | (pmap->pm_tlbpid << 680 VMMACH_TLB_PID_SHIFT), entry); 681 } 682 } 683 } 684 685 /* 686 * Insert the given physical page (p) at 687 * the specified virtual address (v) in the 688 * target physical map with the protection requested. 689 * 690 * If specified, the page will be wired down, meaning 691 * that the related pte can not be reclaimed. 692 * 693 * NB: This is the only routine which MAY NOT lazy-evaluate 694 * or lose information. That is, this routine must actually 695 * insert this page into the given map NOW. 696 */ 697 void 698 pmap_enter(pmap, va, pa, prot, wired) 699 register pmap_t pmap; 700 vm_offset_t va; 701 register vm_offset_t pa; 702 vm_prot_t prot; 703 boolean_t wired; 704 { 705 register pt_entry_t *pte; 706 register u_int npte; 707 register int i, j; 708 vm_page_t mem; 709 710 #ifdef DEBUG 711 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 712 printf("pmap_enter(%x, %x, %x, %x, %x)\n", 713 pmap, va, pa, prot, wired); 714 #endif 715 #ifdef DIAGNOSTIC 716 if (!pmap) 717 panic("pmap_enter: pmap"); 718 if (!pmap->pm_segtab) { 719 enter_stats.kernel++; 720 if (va < VM_MIN_KERNEL_ADDRESS || va >= virtual_end) 721 panic("pmap_enter: kva"); 722 } else { 723 enter_stats.user++; 724 if (va >= VM_MAXUSER_ADDRESS) 725 panic("pmap_enter: uva"); 726 } 727 if (pa & 0x80000000) 728 panic("pmap_enter: pa"); 729 if (!(prot & VM_PROT_READ)) 730 panic("pmap_enter: prot"); 731 #endif 732 733 if (IS_VM_PHYSADDR(pa)) { 734 register pv_entry_t pv, npv; 735 int s; 736 737 if (!(prot & VM_PROT_WRITE)) 738 npte = PG_RO; 739 else { 740 register vm_page_t mem; 741 742 mem = PHYS_TO_VM_PAGE(pa); 743 if ((int)va < 0) { 744 /* 745 * Don't bother to trap on kernel writes, 746 * just record page as dirty. 747 */ 748 npte = PG_M; 749 mem->flags &= ~PG_CLEAN; 750 } else 751 #ifdef ATTR 752 if ((pmap_attributes[atop(pa)] & 753 PMAP_ATTR_MOD) || !(mem->flags & PG_CLEAN)) 754 #else 755 if (!(mem->flags & PG_CLEAN)) 756 #endif 757 npte = PG_M; 758 else 759 npte = 0; 760 } 761 762 #ifdef DEBUG 763 enter_stats.managed++; 764 #endif 765 /* 766 * Enter the pmap and virtual address into the 767 * physical to virtual map table. 768 */ 769 pv = pa_to_pvh(pa); 770 s = splimp(); 771 #ifdef DEBUG 772 if (pmapdebug & PDB_ENTER) 773 printf("pmap_enter: pv %x: was %x/%x/%x\n", 774 pv, pv->pv_va, pv->pv_pmap, pv->pv_next); 775 #endif 776 if (pv->pv_pmap == NULL) { 777 /* 778 * No entries yet, use header as the first entry 779 */ 780 #ifdef DEBUG 781 if (pmapdebug & PDB_PVENTRY) 782 printf("pmap_enter: first pv: pmap %x va %x\n", 783 pmap, va); 784 enter_stats.firstpv++; 785 #endif 786 pv->pv_va = va; 787 pv->pv_pmap = pmap; 788 pv->pv_next = NULL; 789 } else { 790 /* 791 * There is at least one other VA mapping this page. 792 * Place this entry after the header. 793 * 794 * Note: the entry may already be in the table if 795 * we are only changing the protection bits. 796 */ 797 for (npv = pv; npv; npv = npv->pv_next) 798 if (pmap == npv->pv_pmap && va == npv->pv_va) { 799 #ifdef DIAGNOSTIC 800 unsigned entry; 801 802 if (!pmap->pm_segtab) 803 entry = kvtopte(va)->pt_entry; 804 else { 805 pte = pmap_segmap(pmap, va); 806 if (pte) { 807 pte += (va >> PGSHIFT) & 808 (NPTEPG - 1); 809 entry = pte->pt_entry; 810 } else 811 entry = 0; 812 } 813 if (!(entry & PG_V) || 814 (entry & PG_FRAME) != pa) 815 printf( 816 "pmap_enter: found va %x pa %x in pv_table but != %x\n", 817 va, pa, entry); 818 #endif 819 goto fnd; 820 } 821 #ifdef DEBUG 822 if (pmapdebug & PDB_PVENTRY) 823 printf("pmap_enter: new pv: pmap %x va %x\n", 824 pmap, va); 825 #endif 826 /* can this cause us to recurse forever? */ 827 npv = (pv_entry_t) 828 malloc(sizeof *npv, M_VMPVENT, M_NOWAIT); 829 npv->pv_va = va; 830 npv->pv_pmap = pmap; 831 npv->pv_next = pv->pv_next; 832 pv->pv_next = npv; 833 #ifdef DEBUG 834 if (!npv->pv_next) 835 enter_stats.secondpv++; 836 #endif 837 fnd: 838 ; 839 } 840 splx(s); 841 } else { 842 /* 843 * Assumption: if it is not part of our managed memory 844 * then it must be device memory which may be volitile. 845 */ 846 #ifdef DEBUG 847 enter_stats.unmanaged++; 848 #endif 849 npte = (prot & VM_PROT_WRITE) ? (PG_M | PG_N) : (PG_RO | PG_N); 850 } 851 852 /* 853 * The only time we need to flush the cache is if we 854 * execute from a physical address and then change the data. 855 * This is the best place to do this. 856 * pmap_protect() and pmap_remove() are mostly used to switch 857 * between R/W and R/O pages. 858 * NOTE: we only support cache flush for read only text. 859 */ 860 if (prot == (VM_PROT_READ | VM_PROT_EXECUTE)) 861 MachFlushICache(MACH_PHYS_TO_CACHED(pa), PAGE_SIZE); 862 863 if (!pmap->pm_segtab) { 864 /* enter entries into kernel pmap */ 865 pte = kvtopte(va); 866 npte |= pa | PG_V | PG_G; 867 if (wired) { 868 pmap->pm_stats.wired_count += pmaxpagesperpage; 869 npte |= PG_WIRED; 870 } 871 i = pmaxpagesperpage; 872 do { 873 if (!(pte->pt_entry & PG_V)) { 874 pmap->pm_stats.resident_count++; 875 } else { 876 #ifdef DIAGNOSTIC 877 if (pte->pt_entry & PG_WIRED) 878 panic("pmap_enter: kernel wired"); 879 #endif 880 } 881 /* 882 * Update the same virtual address entry. 883 */ 884 MachTLBUpdate(va, npte); 885 pte->pt_entry = npte; 886 va += NBPG; 887 npte += NBPG; 888 pte++; 889 } while (--i != 0); 890 return; 891 } 892 893 if (!(pte = pmap_segmap(pmap, va))) { 894 mem = vm_page_alloc1(); 895 pmap_zero_page(VM_PAGE_TO_PHYS(mem)); 896 pmap_segmap(pmap, va) = pte = (pt_entry_t *) 897 MACH_PHYS_TO_CACHED(VM_PAGE_TO_PHYS(mem)); 898 } 899 pte += (va >> PGSHIFT) & (NPTEPG - 1); 900 901 /* 902 * Now validate mapping with desired protection/wiring. 903 * Assume uniform modified and referenced status for all 904 * PMAX pages in a MACH page. 905 */ 906 npte |= pa | PG_V; 907 if (wired) { 908 pmap->pm_stats.wired_count += pmaxpagesperpage; 909 npte |= PG_WIRED; 910 } 911 #ifdef DEBUG 912 if (pmapdebug & PDB_ENTER) { 913 printf("pmap_enter: new pte %x", npte); 914 if (pmap->pm_tlbgen == tlbpid_gen) 915 printf(" tlbpid %d", pmap->pm_tlbpid); 916 printf("\n"); 917 } 918 #endif 919 i = pmaxpagesperpage; 920 do { 921 pte->pt_entry = npte; 922 if (pmap->pm_tlbgen == tlbpid_gen) 923 MachTLBUpdate(va | (pmap->pm_tlbpid << 924 VMMACH_TLB_PID_SHIFT), npte); 925 va += NBPG; 926 npte += NBPG; 927 pte++; 928 } while (--i != 0); 929 } 930 931 /* 932 * Routine: pmap_change_wiring 933 * Function: Change the wiring attribute for a map/virtual-address 934 * pair. 935 * In/out conditions: 936 * The mapping must already exist in the pmap. 937 */ 938 void 939 pmap_change_wiring(pmap, va, wired) 940 register pmap_t pmap; 941 vm_offset_t va; 942 boolean_t wired; 943 { 944 register pt_entry_t *pte; 945 u_int p; 946 register int i; 947 948 #ifdef DEBUG 949 if (pmapdebug & (PDB_FOLLOW|PDB_WIRING)) 950 printf("pmap_change_wiring(%x, %x, %x)\n", pmap, va, wired); 951 #endif 952 if (pmap == NULL) 953 return; 954 955 p = wired ? PG_WIRED : 0; 956 957 /* 958 * Don't need to flush the TLB since PG_WIRED is only in software. 959 */ 960 if (!pmap->pm_segtab) { 961 /* change entries in kernel pmap */ 962 #ifdef DIAGNOSTIC 963 if (va < VM_MIN_KERNEL_ADDRESS || va >= virtual_end) 964 panic("pmap_change_wiring"); 965 #endif 966 pte = kvtopte(va); 967 } else { 968 if (!(pte = pmap_segmap(pmap, va))) 969 return; 970 pte += (va >> PGSHIFT) & (NPTEPG - 1); 971 } 972 973 i = pmaxpagesperpage; 974 if (!(pte->pt_entry & PG_WIRED) && p) 975 pmap->pm_stats.wired_count += i; 976 else if ((pte->pt_entry & PG_WIRED) && !p) 977 pmap->pm_stats.wired_count -= i; 978 do { 979 if (pte->pt_entry & PG_V) 980 pte->pt_entry = (pte->pt_entry & ~PG_WIRED) | p; 981 pte++; 982 } while (--i != 0); 983 } 984 985 /* 986 * Routine: pmap_extract 987 * Function: 988 * Extract the physical page address associated 989 * with the given map/virtual_address pair. 990 */ 991 vm_offset_t 992 pmap_extract(pmap, va) 993 register pmap_t pmap; 994 vm_offset_t va; 995 { 996 register vm_offset_t pa; 997 998 #ifdef DEBUG 999 if (pmapdebug & PDB_FOLLOW) 1000 printf("pmap_extract(%x, %x) -> ", pmap, va); 1001 #endif 1002 1003 if (!pmap->pm_segtab) { 1004 #ifdef DIAGNOSTIC 1005 if (va < VM_MIN_KERNEL_ADDRESS || va >= virtual_end) 1006 panic("pmap_extract"); 1007 #endif 1008 pa = kvtopte(va)->pt_entry & PG_FRAME; 1009 } else { 1010 register pt_entry_t *pte; 1011 1012 if (!(pte = pmap_segmap(pmap, va))) 1013 pa = 0; 1014 else { 1015 pte += (va >> PGSHIFT) & (NPTEPG - 1); 1016 pa = pte->pt_entry & PG_FRAME; 1017 } 1018 } 1019 if (pa) 1020 pa |= va & PGOFSET; 1021 1022 #ifdef DEBUG 1023 if (pmapdebug & PDB_FOLLOW) 1024 printf("pmap_extract: pa %x\n", pa); 1025 #endif 1026 return (pa); 1027 } 1028 1029 /* 1030 * Copy the range specified by src_addr/len 1031 * from the source map to the range dst_addr/len 1032 * in the destination map. 1033 * 1034 * This routine is only advisory and need not do anything. 1035 */ 1036 void 1037 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1038 pmap_t dst_pmap; 1039 pmap_t src_pmap; 1040 vm_offset_t dst_addr; 1041 vm_size_t len; 1042 vm_offset_t src_addr; 1043 { 1044 1045 #ifdef DEBUG 1046 if (pmapdebug & PDB_FOLLOW) 1047 printf("pmap_copy(%x, %x, %x, %x, %x)\n", 1048 dst_pmap, src_pmap, dst_addr, len, src_addr); 1049 #endif 1050 } 1051 1052 /* 1053 * Require that all active physical maps contain no 1054 * incorrect entries NOW. [This update includes 1055 * forcing updates of any address map caching.] 1056 * 1057 * Generally used to insure that a thread about 1058 * to run will see a semantically correct world. 1059 */ 1060 void 1061 pmap_update() 1062 { 1063 1064 #ifdef DEBUG 1065 if (pmapdebug & PDB_FOLLOW) 1066 printf("pmap_update()\n"); 1067 #endif 1068 } 1069 1070 /* 1071 * Routine: pmap_collect 1072 * Function: 1073 * Garbage collects the physical map system for 1074 * pages which are no longer used. 1075 * Success need not be guaranteed -- that is, there 1076 * may well be pages which are not referenced, but 1077 * others may be collected. 1078 * Usage: 1079 * Called by the pageout daemon when pages are scarce. 1080 */ 1081 void 1082 pmap_collect(pmap) 1083 pmap_t pmap; 1084 { 1085 1086 #ifdef DEBUG 1087 if (pmapdebug & PDB_FOLLOW) 1088 printf("pmap_collect(%x)\n", pmap); 1089 #endif 1090 } 1091 1092 /* 1093 * pmap_zero_page zeros the specified (machine independent) 1094 * page. 1095 */ 1096 void 1097 pmap_zero_page(phys) 1098 vm_offset_t phys; 1099 { 1100 register int *p, *end; 1101 1102 #ifdef DEBUG 1103 if (pmapdebug & PDB_FOLLOW) 1104 printf("pmap_zero_page(%x)\n", phys); 1105 #endif 1106 p = (int *)MACH_PHYS_TO_CACHED(phys); 1107 end = p + PAGE_SIZE / sizeof(int); 1108 do { 1109 p[0] = 0; 1110 p[1] = 0; 1111 p[2] = 0; 1112 p[3] = 0; 1113 p += 4; 1114 } while (p != end); 1115 } 1116 1117 /* 1118 * pmap_copy_page copies the specified (machine independent) 1119 * page. 1120 */ 1121 void 1122 pmap_copy_page(src, dst) 1123 vm_offset_t src, dst; 1124 { 1125 register int *s, *d, *end; 1126 register int tmp0, tmp1, tmp2, tmp3; 1127 1128 #ifdef DEBUG 1129 if (pmapdebug & PDB_FOLLOW) 1130 printf("pmap_copy_page(%x, %x)\n", src, dst); 1131 #endif 1132 s = (int *)MACH_PHYS_TO_CACHED(src); 1133 d = (int *)MACH_PHYS_TO_CACHED(dst); 1134 end = s + PAGE_SIZE / sizeof(int); 1135 do { 1136 tmp0 = s[0]; 1137 tmp1 = s[1]; 1138 tmp2 = s[2]; 1139 tmp3 = s[3]; 1140 d[0] = tmp0; 1141 d[1] = tmp1; 1142 d[2] = tmp2; 1143 d[3] = tmp3; 1144 s += 4; 1145 d += 4; 1146 } while (s != end); 1147 } 1148 1149 /* 1150 * Routine: pmap_pageable 1151 * Function: 1152 * Make the specified pages (by pmap, offset) 1153 * pageable (or not) as requested. 1154 * 1155 * A page which is not pageable may not take 1156 * a fault; therefore, its page table entry 1157 * must remain valid for the duration. 1158 * 1159 * This routine is merely advisory; pmap_enter 1160 * will specify that these pages are to be wired 1161 * down (or not) as appropriate. 1162 */ 1163 void 1164 pmap_pageable(pmap, sva, eva, pageable) 1165 pmap_t pmap; 1166 vm_offset_t sva, eva; 1167 boolean_t pageable; 1168 { 1169 1170 #ifdef DEBUG 1171 if (pmapdebug & PDB_FOLLOW) 1172 printf("pmap_pageable(%x, %x, %x, %x)\n", 1173 pmap, sva, eva, pageable); 1174 #endif 1175 } 1176 1177 /* 1178 * Clear the modify bits on the specified physical page. 1179 */ 1180 void 1181 pmap_clear_modify(pa) 1182 vm_offset_t pa; 1183 { 1184 1185 #ifdef DEBUG 1186 if (pmapdebug & PDB_FOLLOW) 1187 printf("pmap_clear_modify(%x)\n", pa); 1188 #endif 1189 #ifdef ATTR 1190 pmap_attributes[atop(pa)] &= ~PMAP_ATTR_MOD; 1191 #endif 1192 } 1193 1194 /* 1195 * pmap_clear_reference: 1196 * 1197 * Clear the reference bit on the specified physical page. 1198 */ 1199 void 1200 pmap_clear_reference(pa) 1201 vm_offset_t pa; 1202 { 1203 1204 #ifdef DEBUG 1205 if (pmapdebug & PDB_FOLLOW) 1206 printf("pmap_clear_reference(%x)\n", pa); 1207 #endif 1208 #ifdef ATTR 1209 pmap_attributes[atop(pa)] &= ~PMAP_ATTR_REF; 1210 #endif 1211 } 1212 1213 /* 1214 * pmap_is_referenced: 1215 * 1216 * Return whether or not the specified physical page is referenced 1217 * by any physical maps. 1218 */ 1219 boolean_t 1220 pmap_is_referenced(pa) 1221 vm_offset_t pa; 1222 { 1223 #ifdef ATTR 1224 return (pmap_attributes[atop(pa)] & PMAP_ATTR_REF); 1225 #else 1226 return (FALSE); 1227 #endif 1228 } 1229 1230 /* 1231 * pmap_is_modified: 1232 * 1233 * Return whether or not the specified physical page is modified 1234 * by any physical maps. 1235 */ 1236 boolean_t 1237 pmap_is_modified(pa) 1238 vm_offset_t pa; 1239 { 1240 #ifdef ATTR 1241 return (pmap_attributes[atop(pa)] & PMAP_ATTR_MOD); 1242 #else 1243 return (FALSE); 1244 #endif 1245 } 1246 1247 vm_offset_t 1248 pmap_phys_address(ppn) 1249 int ppn; 1250 { 1251 1252 #ifdef DEBUG 1253 if (pmapdebug & PDB_FOLLOW) 1254 printf("pmap_phys_address(%x)\n", ppn); 1255 #endif 1256 return (pmax_ptob(ppn)); 1257 } 1258 1259 /* 1260 * Miscellaneous support routines 1261 */ 1262 1263 /* 1264 * Allocate a hardware PID and return it. 1265 * It takes almost as much or more time to search the TLB for a 1266 * specific PID and flush those entries as it does to flush the entire TLB. 1267 * Therefore, when we allocate a new PID, we just take the next number. When 1268 * we run out of numbers, we flush the TLB, increment the generation count 1269 * and start over. PID zero is reserved for kernel use. 1270 * This is called only by switch(). 1271 */ 1272 int 1273 pmap_alloc_tlbpid(p) 1274 register struct proc *p; 1275 { 1276 register pmap_t pmap; 1277 register int id; 1278 1279 pmap = &p->p_vmspace->vm_pmap; 1280 if (pmap->pm_tlbgen != tlbpid_gen) { 1281 id = tlbpid_cnt; 1282 if (id == VMMACH_NUM_PIDS) { 1283 MachTLBFlush(); 1284 /* reserve tlbpid_gen == 0 to alway mean invalid */ 1285 if (++tlbpid_gen == 0) 1286 tlbpid_gen = 1; 1287 id = 1; 1288 } 1289 tlbpid_cnt = id + 1; 1290 pmap->pm_tlbpid = id; 1291 pmap->pm_tlbgen = tlbpid_gen; 1292 } else 1293 id = pmap->pm_tlbpid; 1294 1295 #ifdef DEBUG 1296 if (pmapdebug & (PDB_FOLLOW|PDB_TLBPID)) { 1297 if (curproc) 1298 printf("pmap_alloc_tlbpid: curproc %d '%s' ", 1299 curproc->p_pid, curproc->p_comm); 1300 else 1301 printf("pmap_alloc_tlbpid: curproc <none> "); 1302 printf("segtab %x tlbpid %d pid %d '%s'\n", 1303 pmap->pm_segtab, id, p->p_pid, p->p_comm); 1304 } 1305 #endif 1306 return (id); 1307 } 1308 1309 /* 1310 * Remove a physical to virtual address translation. 1311 */ 1312 void 1313 pmap_remove_pv(pmap, va, pa) 1314 pmap_t pmap; 1315 vm_offset_t va, pa; 1316 { 1317 register pv_entry_t pv, npv; 1318 int s; 1319 1320 #ifdef DEBUG 1321 if (pmapdebug & (PDB_FOLLOW|PDB_PVENTRY)) 1322 printf("pmap_remove_pv(%x, %x, %x)\n", pmap, va, pa); 1323 #endif 1324 /* 1325 * Remove page from the PV table (raise IPL since we 1326 * may be called at interrupt time). 1327 */ 1328 if (!IS_VM_PHYSADDR(pa)) 1329 return; 1330 pv = pa_to_pvh(pa); 1331 s = splimp(); 1332 /* 1333 * If it is the first entry on the list, it is actually 1334 * in the header and we must copy the following entry up 1335 * to the header. Otherwise we must search the list for 1336 * the entry. In either case we free the now unused entry. 1337 */ 1338 if (pmap == pv->pv_pmap && va == pv->pv_va) { 1339 npv = pv->pv_next; 1340 if (npv) { 1341 *pv = *npv; 1342 free((caddr_t)npv, M_VMPVENT); 1343 } else 1344 pv->pv_pmap = NULL; 1345 #ifdef DEBUG 1346 remove_stats.pvfirst++; 1347 #endif 1348 } else { 1349 for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) { 1350 #ifdef DEBUG 1351 remove_stats.pvsearch++; 1352 #endif 1353 if (pmap == npv->pv_pmap && va == npv->pv_va) 1354 goto fnd; 1355 } 1356 #ifdef DIAGNOSTIC 1357 printf("pmap_remove_pv(%x, %x, %x) not found\n", pmap, va, pa); 1358 panic("pmap_remove_pv"); 1359 #endif 1360 fnd: 1361 pv->pv_next = npv->pv_next; 1362 free((caddr_t)npv, M_VMPVENT); 1363 } 1364 splx(s); 1365 } 1366 1367 /* 1368 * vm_page_alloc1: 1369 * 1370 * Allocate and return a memory cell with no associated object. 1371 */ 1372 vm_page_t 1373 vm_page_alloc1() 1374 { 1375 register vm_page_t mem; 1376 int spl; 1377 1378 spl = splimp(); /* XXX */ 1379 simple_lock(&vm_page_queue_free_lock); 1380 if (vm_page_queue_free.tqh_first == NULL) { 1381 simple_unlock(&vm_page_queue_free_lock); 1382 splx(spl); 1383 return (NULL); 1384 } 1385 1386 mem = vm_page_queue_free.tqh_first; 1387 TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); 1388 1389 cnt.v_free_count--; 1390 simple_unlock(&vm_page_queue_free_lock); 1391 splx(spl); 1392 1393 mem->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 1394 mem->wire_count = 0; 1395 1396 /* 1397 * Decide if we should poke the pageout daemon. 1398 * We do this if the free count is less than the low 1399 * water mark, or if the free count is less than the high 1400 * water mark (but above the low water mark) and the inactive 1401 * count is less than its target. 1402 * 1403 * We don't have the counts locked ... if they change a little, 1404 * it doesn't really matter. 1405 */ 1406 1407 if (cnt.v_free_count < cnt.v_free_min || 1408 (cnt.v_free_count < cnt.v_free_target && 1409 cnt.v_inactive_count < cnt.v_inactive_target)) 1410 thread_wakeup((int)&vm_pages_needed); 1411 return (mem); 1412 } 1413 1414 /* 1415 * vm_page_free1: 1416 * 1417 * Returns the given page to the free list, 1418 * disassociating it with any VM object. 1419 * 1420 * Object and page must be locked prior to entry. 1421 */ 1422 void 1423 vm_page_free1(mem) 1424 register vm_page_t mem; 1425 { 1426 1427 if (mem->flags & PG_ACTIVE) { 1428 TAILQ_REMOVE(&vm_page_queue_active, mem, pageq); 1429 mem->flags &= ~PG_ACTIVE; 1430 cnt.v_active_count--; 1431 } 1432 1433 if (mem->flags & PG_INACTIVE) { 1434 TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq); 1435 mem->flags &= ~PG_INACTIVE; 1436 cnt.v_inactive_count--; 1437 } 1438 1439 if (!(mem->flags & PG_FICTITIOUS)) { 1440 int spl; 1441 1442 spl = splimp(); 1443 simple_lock(&vm_page_queue_free_lock); 1444 TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq); 1445 1446 cnt.v_free_count++; 1447 simple_unlock(&vm_page_queue_free_lock); 1448 splx(spl); 1449 } 1450 } 1451