1 /* 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * the Systems Programming Group of the University of Utah Computer 7 * Science Department and Ralph Campbell. 8 * 9 * %sccs.include.redist.c% 10 * 11 * @(#)pmap.c 8.4 (Berkeley) 01/26/94 12 */ 13 14 /* 15 * Manages physical address maps. 16 * 17 * In addition to hardware address maps, this 18 * module is called upon to provide software-use-only 19 * maps which may or may not be stored in the same 20 * form as hardware maps. These pseudo-maps are 21 * used to store intermediate results from copy 22 * operations to and from address spaces. 23 * 24 * Since the information managed by this module is 25 * also stored by the logical address mapping module, 26 * this module may throw away valid virtual-to-physical 27 * mappings at almost any time. However, invalidations 28 * of virtual-to-physical mappings must be done as 29 * requested. 30 * 31 * In order to cope with hardware architectures which 32 * make virtual-to-physical map invalidates expensive, 33 * this module may delay invalidate or reduced protection 34 * operations until such time as they are actually 35 * necessary. This module is given full information as 36 * to which processors are currently using which maps, 37 * and to when physical maps must be made correct. 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/malloc.h> 44 #include <sys/user.h> 45 #include <sys/buf.h> 46 #ifdef SYSVSHM 47 #include <sys/shm.h> 48 #endif 49 50 #include <vm/vm_kern.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_pageout.h> 53 54 #include <machine/machConst.h> 55 #include <machine/pte.h> 56 57 extern vm_page_t vm_page_alloc1 __P((void)); 58 extern void vm_page_free1 __P((vm_page_t)); 59 60 /* 61 * For each vm_page_t, there is a list of all currently valid virtual 62 * mappings of that page. An entry is a pv_entry_t, the list is pv_table. 63 * XXX really should do this as a part of the higher level code. 64 */ 65 typedef struct pv_entry { 66 struct pv_entry *pv_next; /* next pv_entry */ 67 struct pmap *pv_pmap; /* pmap where mapping lies */ 68 vm_offset_t pv_va; /* virtual address for mapping */ 69 } *pv_entry_t; 70 71 pv_entry_t pv_table; /* array of entries, one per page */ 72 extern void pmap_remove_pv(); 73 74 #define pa_index(pa) atop((pa) - first_phys_addr) 75 #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 76 77 #ifdef DEBUG 78 struct { 79 int kernel; /* entering kernel mapping */ 80 int user; /* entering user mapping */ 81 int ptpneeded; /* needed to allocate a PT page */ 82 int pwchange; /* no mapping change, just wiring or protection */ 83 int wchange; /* no mapping change, just wiring */ 84 int mchange; /* was mapped but mapping to different page */ 85 int managed; /* a managed page */ 86 int firstpv; /* first mapping for this PA */ 87 int secondpv; /* second mapping for this PA */ 88 int ci; /* cache inhibited */ 89 int unmanaged; /* not a managed page */ 90 int flushes; /* cache flushes */ 91 int cachehit; /* new entry forced valid entry out */ 92 } enter_stats; 93 struct { 94 int calls; 95 int removes; 96 int flushes; 97 int pidflushes; /* HW pid stolen */ 98 int pvfirst; 99 int pvsearch; 100 } remove_stats; 101 102 int pmapdebug; 103 #define PDB_FOLLOW 0x0001 104 #define PDB_INIT 0x0002 105 #define PDB_ENTER 0x0004 106 #define PDB_REMOVE 0x0008 107 #define PDB_CREATE 0x0010 108 #define PDB_PTPAGE 0x0020 109 #define PDB_PVENTRY 0x0040 110 #define PDB_BITS 0x0080 111 #define PDB_COLLECT 0x0100 112 #define PDB_PROTECT 0x0200 113 #define PDB_TLBPID 0x0400 114 #define PDB_PARANOIA 0x2000 115 #define PDB_WIRING 0x4000 116 #define PDB_PVDUMP 0x8000 117 118 #endif /* DEBUG */ 119 120 struct pmap kernel_pmap_store; 121 122 vm_offset_t avail_start; /* PA of first available physical page */ 123 vm_offset_t avail_end; /* PA of last available physical page */ 124 vm_size_t mem_size; /* memory size in bytes */ 125 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss)*/ 126 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 127 int pmaxpagesperpage; /* PAGE_SIZE / NBPG */ 128 #ifdef ATTR 129 char *pmap_attributes; /* reference and modify bits */ 130 #endif 131 struct segtab *free_segtab; /* free list kept locally */ 132 u_int tlbpid_gen = 1; /* TLB PID generation count */ 133 int tlbpid_cnt = 2; /* next available TLB PID */ 134 pt_entry_t *Sysmap; /* kernel pte table */ 135 u_int Sysmapsize; /* number of pte's in Sysmap */ 136 137 /* 138 * Bootstrap the system enough to run with virtual memory. 139 * firstaddr is the first unused kseg0 address (not page aligned). 140 */ 141 void 142 pmap_bootstrap(firstaddr) 143 vm_offset_t firstaddr; 144 { 145 register int i; 146 vm_offset_t start = firstaddr; 147 extern int maxmem, physmem; 148 149 #define valloc(name, type, num) \ 150 (name) = (type *)firstaddr; firstaddr = (vm_offset_t)((name)+(num)) 151 /* 152 * Allocate a PTE table for the kernel. 153 * The '1024' comes from PAGER_MAP_SIZE in vm_pager_init(). 154 * This should be kept in sync. 155 * We also reserve space for kmem_alloc_pageable() for vm_fork(). 156 */ 157 Sysmapsize = (VM_KMEM_SIZE + VM_MBUF_SIZE + VM_PHYS_SIZE + 158 nbuf * MAXBSIZE + 16 * NCARGS) / NBPG + 1024 + 256; 159 #ifdef SYSVSHM 160 Sysmapsize += shminfo.shmall; 161 #endif 162 valloc(Sysmap, pt_entry_t, Sysmapsize); 163 #ifdef ATTR 164 valloc(pmap_attributes, char, physmem); 165 #endif 166 /* 167 * Allocate memory for pv_table. 168 * This will allocate more entries than we really need. 169 * We could do this in pmap_init when we know the actual 170 * phys_start and phys_end but its better to use kseg0 addresses 171 * rather than kernel virtual addresses mapped through the TLB. 172 */ 173 i = maxmem - pmax_btop(MACH_CACHED_TO_PHYS(firstaddr)); 174 valloc(pv_table, struct pv_entry, i); 175 176 /* 177 * Clear allocated memory. 178 */ 179 firstaddr = pmax_round_page(firstaddr); 180 bzero((caddr_t)start, firstaddr - start); 181 182 avail_start = MACH_CACHED_TO_PHYS(firstaddr); 183 avail_end = pmax_ptob(maxmem); 184 mem_size = avail_end - avail_start; 185 186 virtual_avail = VM_MIN_KERNEL_ADDRESS; 187 virtual_end = VM_MIN_KERNEL_ADDRESS + Sysmapsize * NBPG; 188 /* XXX need to decide how to set cnt.v_page_size */ 189 pmaxpagesperpage = 1; 190 191 simple_lock_init(&kernel_pmap_store.pm_lock); 192 kernel_pmap_store.pm_count = 1; 193 } 194 195 /* 196 * Bootstrap memory allocator. This function allows for early dynamic 197 * memory allocation until the virtual memory system has been bootstrapped. 198 * After that point, either kmem_alloc or malloc should be used. This 199 * function works by stealing pages from the (to be) managed page pool, 200 * stealing virtual address space, then mapping the pages and zeroing them. 201 * 202 * It should be used from pmap_bootstrap till vm_page_startup, afterwards 203 * it cannot be used, and will generate a panic if tried. Note that this 204 * memory will never be freed, and in essence it is wired down. 205 */ 206 void * 207 pmap_bootstrap_alloc(size) 208 int size; 209 { 210 vm_offset_t val; 211 extern boolean_t vm_page_startup_initialized; 212 213 if (vm_page_startup_initialized) 214 panic("pmap_bootstrap_alloc: called after startup initialized"); 215 216 val = MACH_PHYS_TO_CACHED(avail_start); 217 size = round_page(size); 218 avail_start += size; 219 220 blkclr((caddr_t)val, size); 221 return ((void *)val); 222 } 223 224 /* 225 * Initialize the pmap module. 226 * Called by vm_init, to initialize any structures that the pmap 227 * system needs to map virtual memory. 228 */ 229 void 230 pmap_init(phys_start, phys_end) 231 vm_offset_t phys_start, phys_end; 232 { 233 234 #ifdef DEBUG 235 if (pmapdebug & (PDB_FOLLOW|PDB_INIT)) 236 printf("pmap_init(%x, %x)\n", phys_start, phys_end); 237 #endif 238 } 239 240 /* 241 * Create and return a physical map. 242 * 243 * If the size specified for the map 244 * is zero, the map is an actual physical 245 * map, and may be referenced by the 246 * hardware. 247 * 248 * If the size specified is non-zero, 249 * the map will be used in software only, and 250 * is bounded by that size. 251 */ 252 pmap_t 253 pmap_create(size) 254 vm_size_t size; 255 { 256 register pmap_t pmap; 257 258 #ifdef DEBUG 259 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 260 printf("pmap_create(%x)\n", size); 261 #endif 262 /* 263 * Software use map does not need a pmap 264 */ 265 if (size) 266 return (NULL); 267 268 /* XXX: is it ok to wait here? */ 269 pmap = (pmap_t) malloc(sizeof *pmap, M_VMPMAP, M_WAITOK); 270 #ifdef notifwewait 271 if (pmap == NULL) 272 panic("pmap_create: cannot allocate a pmap"); 273 #endif 274 bzero(pmap, sizeof(*pmap)); 275 pmap_pinit(pmap); 276 return (pmap); 277 } 278 279 /* 280 * Initialize a preallocated and zeroed pmap structure, 281 * such as one in a vmspace structure. 282 */ 283 void 284 pmap_pinit(pmap) 285 register struct pmap *pmap; 286 { 287 register int i; 288 int s; 289 extern struct vmspace vmspace0; 290 extern struct user *proc0paddr; 291 292 #ifdef DEBUG 293 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 294 printf("pmap_pinit(%x)\n", pmap); 295 #endif 296 simple_lock_init(&pmap->pm_lock); 297 pmap->pm_count = 1; 298 if (free_segtab) { 299 s = splimp(); 300 pmap->pm_segtab = free_segtab; 301 free_segtab = *(struct segtab **)free_segtab; 302 pmap->pm_segtab->seg_tab[0] = NULL; 303 splx(s); 304 } else { 305 register struct segtab *stp; 306 vm_page_t mem; 307 308 mem = vm_page_alloc1(); 309 pmap_zero_page(VM_PAGE_TO_PHYS(mem)); 310 pmap->pm_segtab = stp = (struct segtab *) 311 MACH_PHYS_TO_CACHED(VM_PAGE_TO_PHYS(mem)); 312 i = pmaxpagesperpage * (NBPG / sizeof(struct segtab)); 313 s = splimp(); 314 while (--i != 0) { 315 stp++; 316 *(struct segtab **)stp = free_segtab; 317 free_segtab = stp; 318 } 319 splx(s); 320 } 321 #ifdef DIAGNOSTIC 322 for (i = 0; i < PMAP_SEGTABSIZE; i++) 323 if (pmap->pm_segtab->seg_tab[i] != 0) 324 panic("pmap_pinit: pm_segtab != 0"); 325 #endif 326 if (pmap == &vmspace0.vm_pmap) { 327 /* 328 * The initial process has already been allocated a TLBPID 329 * in mach_init(). 330 */ 331 pmap->pm_tlbpid = 1; 332 pmap->pm_tlbgen = tlbpid_gen; 333 proc0paddr->u_pcb.pcb_segtab = (void *)pmap->pm_segtab; 334 } else { 335 pmap->pm_tlbpid = 0; 336 pmap->pm_tlbgen = 0; 337 } 338 } 339 340 /* 341 * Retire the given physical map from service. 342 * Should only be called if the map contains 343 * no valid mappings. 344 */ 345 void 346 pmap_destroy(pmap) 347 register pmap_t pmap; 348 { 349 int count; 350 351 #ifdef DEBUG 352 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 353 printf("pmap_destroy(%x)\n", pmap); 354 #endif 355 if (pmap == NULL) 356 return; 357 358 simple_lock(&pmap->pm_lock); 359 count = --pmap->pm_count; 360 simple_unlock(&pmap->pm_lock); 361 if (count == 0) { 362 pmap_release(pmap); 363 free((caddr_t)pmap, M_VMPMAP); 364 } 365 } 366 367 /* 368 * Release any resources held by the given physical map. 369 * Called when a pmap initialized by pmap_pinit is being released. 370 * Should only be called if the map contains no valid mappings. 371 */ 372 void 373 pmap_release(pmap) 374 register pmap_t pmap; 375 { 376 377 #ifdef DEBUG 378 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE)) 379 printf("pmap_release(%x)\n", pmap); 380 #endif 381 382 if (pmap->pm_segtab) { 383 register pt_entry_t *pte; 384 register int i; 385 int s; 386 #ifdef DIAGNOSTIC 387 register int j; 388 #endif 389 390 for (i = 0; i < PMAP_SEGTABSIZE; i++) { 391 /* get pointer to segment map */ 392 pte = pmap->pm_segtab->seg_tab[i]; 393 if (!pte) 394 continue; 395 vm_page_free1( 396 PHYS_TO_VM_PAGE(MACH_CACHED_TO_PHYS(pte))); 397 #ifdef DIAGNOSTIC 398 for (j = 0; j < NPTEPG; j++) { 399 if (pte->pt_entry) 400 panic("pmap_release: segmap not empty"); 401 } 402 #endif 403 pmap->pm_segtab->seg_tab[i] = NULL; 404 } 405 s = splimp(); 406 *(struct segtab **)pmap->pm_segtab = free_segtab; 407 free_segtab = pmap->pm_segtab; 408 splx(s); 409 pmap->pm_segtab = NULL; 410 } 411 } 412 413 /* 414 * Add a reference to the specified pmap. 415 */ 416 void 417 pmap_reference(pmap) 418 pmap_t pmap; 419 { 420 421 #ifdef DEBUG 422 if (pmapdebug & PDB_FOLLOW) 423 printf("pmap_reference(%x)\n", pmap); 424 #endif 425 if (pmap != NULL) { 426 simple_lock(&pmap->pm_lock); 427 pmap->pm_count++; 428 simple_unlock(&pmap->pm_lock); 429 } 430 } 431 432 /* 433 * Remove the given range of addresses from the specified map. 434 * 435 * It is assumed that the start and end are properly 436 * rounded to the page size. 437 */ 438 void 439 pmap_remove(pmap, sva, eva) 440 register pmap_t pmap; 441 vm_offset_t sva, eva; 442 { 443 register vm_offset_t nssva; 444 register pt_entry_t *pte; 445 unsigned entry; 446 447 #ifdef DEBUG 448 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT)) 449 printf("pmap_remove(%x, %x, %x)\n", pmap, sva, eva); 450 remove_stats.calls++; 451 #endif 452 if (pmap == NULL) 453 return; 454 455 if (!pmap->pm_segtab) { 456 register pt_entry_t *pte; 457 458 /* remove entries from kernel pmap */ 459 #ifdef DIAGNOSTIC 460 if (sva < VM_MIN_KERNEL_ADDRESS || eva > virtual_end) 461 panic("pmap_remove: kva not in range"); 462 #endif 463 pte = kvtopte(sva); 464 for (; sva < eva; sva += NBPG, pte++) { 465 entry = pte->pt_entry; 466 if (!(entry & PG_V)) 467 continue; 468 if (entry & PG_WIRED) 469 pmap->pm_stats.wired_count--; 470 pmap->pm_stats.resident_count--; 471 pmap_remove_pv(pmap, sva, entry & PG_FRAME); 472 #ifdef ATTR 473 pmap_attributes[atop(entry & PG_FRAME)] = 0; 474 #endif 475 pte->pt_entry = PG_NV; 476 /* 477 * Flush the TLB for the given address. 478 */ 479 MachTLBFlushAddr(sva); 480 #ifdef DEBUG 481 remove_stats.flushes++; 482 #endif 483 } 484 return; 485 } 486 487 #ifdef DIAGNOSTIC 488 if (eva > VM_MAXUSER_ADDRESS) 489 panic("pmap_remove: uva not in range"); 490 #endif 491 while (sva < eva) { 492 nssva = pmax_trunc_seg(sva) + NBSEG; 493 if (nssva == 0 || nssva > eva) 494 nssva = eva; 495 /* 496 * If VA belongs to an unallocated segment, 497 * skip to the next segment boundary. 498 */ 499 if (!(pte = pmap_segmap(pmap, sva))) { 500 sva = nssva; 501 continue; 502 } 503 /* 504 * Invalidate every valid mapping within this segment. 505 */ 506 pte += (sva >> PGSHIFT) & (NPTEPG - 1); 507 for (; sva < nssva; sva += NBPG, pte++) { 508 entry = pte->pt_entry; 509 if (!(entry & PG_V)) 510 continue; 511 if (entry & PG_WIRED) 512 pmap->pm_stats.wired_count--; 513 pmap->pm_stats.resident_count--; 514 pmap_remove_pv(pmap, sva, entry & PG_FRAME); 515 #ifdef ATTR 516 pmap_attributes[atop(entry & PG_FRAME)] = 0; 517 #endif 518 pte->pt_entry = PG_NV; 519 /* 520 * Flush the TLB for the given address. 521 */ 522 if (pmap->pm_tlbgen == tlbpid_gen) { 523 MachTLBFlushAddr(sva | (pmap->pm_tlbpid << 524 VMMACH_TLB_PID_SHIFT)); 525 #ifdef DEBUG 526 remove_stats.flushes++; 527 #endif 528 } 529 } 530 } 531 } 532 533 /* 534 * pmap_page_protect: 535 * 536 * Lower the permission for all mappings to a given page. 537 */ 538 void 539 pmap_page_protect(pa, prot) 540 vm_offset_t pa; 541 vm_prot_t prot; 542 { 543 register pv_entry_t pv; 544 register vm_offset_t va; 545 int s; 546 547 #ifdef DEBUG 548 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) || 549 prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)) 550 printf("pmap_page_protect(%x, %x)\n", pa, prot); 551 #endif 552 if (!IS_VM_PHYSADDR(pa)) 553 return; 554 555 switch (prot) { 556 case VM_PROT_READ|VM_PROT_WRITE: 557 case VM_PROT_ALL: 558 break; 559 560 /* copy_on_write */ 561 case VM_PROT_READ: 562 case VM_PROT_READ|VM_PROT_EXECUTE: 563 pv = pa_to_pvh(pa); 564 s = splimp(); 565 /* 566 * Loop over all current mappings setting/clearing as appropos. 567 */ 568 if (pv->pv_pmap != NULL) { 569 for (; pv; pv = pv->pv_next) { 570 extern vm_offset_t pager_sva, pager_eva; 571 572 va = pv->pv_va; 573 574 /* 575 * XXX don't write protect pager mappings 576 */ 577 if (va >= pager_sva && va < pager_eva) 578 continue; 579 pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE, 580 prot); 581 } 582 } 583 splx(s); 584 break; 585 586 /* remove_all */ 587 default: 588 pv = pa_to_pvh(pa); 589 s = splimp(); 590 while (pv->pv_pmap != NULL) { 591 pmap_remove(pv->pv_pmap, pv->pv_va, 592 pv->pv_va + PAGE_SIZE); 593 } 594 splx(s); 595 } 596 } 597 598 /* 599 * Set the physical protection on the 600 * specified range of this map as requested. 601 */ 602 void 603 pmap_protect(pmap, sva, eva, prot) 604 register pmap_t pmap; 605 vm_offset_t sva, eva; 606 vm_prot_t prot; 607 { 608 register vm_offset_t nssva; 609 register pt_entry_t *pte; 610 register unsigned entry; 611 u_int p; 612 613 #ifdef DEBUG 614 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) 615 printf("pmap_protect(%x, %x, %x, %x)\n", pmap, sva, eva, prot); 616 #endif 617 if (pmap == NULL) 618 return; 619 620 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 621 pmap_remove(pmap, sva, eva); 622 return; 623 } 624 625 p = (prot & VM_PROT_WRITE) ? PG_RW : PG_RO; 626 627 if (!pmap->pm_segtab) { 628 /* 629 * Change entries in kernel pmap. 630 * This will trap if the page is writeable (in order to set 631 * the dirty bit) even if the dirty bit is already set. The 632 * optimization isn't worth the effort since this code isn't 633 * executed much. The common case is to make a user page 634 * read-only. 635 */ 636 #ifdef DIAGNOSTIC 637 if (sva < VM_MIN_KERNEL_ADDRESS || eva > virtual_end) 638 panic("pmap_protect: kva not in range"); 639 #endif 640 pte = kvtopte(sva); 641 for (; sva < eva; sva += NBPG, pte++) { 642 entry = pte->pt_entry; 643 if (!(entry & PG_V)) 644 continue; 645 entry = (entry & ~(PG_M | PG_RO)) | p; 646 pte->pt_entry = entry; 647 /* 648 * Update the TLB if the given address is in the cache. 649 */ 650 MachTLBUpdate(sva, entry); 651 } 652 return; 653 } 654 655 #ifdef DIAGNOSTIC 656 if (eva > VM_MAXUSER_ADDRESS) 657 panic("pmap_protect: uva not in range"); 658 #endif 659 while (sva < eva) { 660 nssva = pmax_trunc_seg(sva) + NBSEG; 661 if (nssva == 0 || nssva > eva) 662 nssva = eva; 663 /* 664 * If VA belongs to an unallocated segment, 665 * skip to the next segment boundary. 666 */ 667 if (!(pte = pmap_segmap(pmap, sva))) { 668 sva = nssva; 669 continue; 670 } 671 /* 672 * Change protection on every valid mapping within this segment. 673 */ 674 pte += (sva >> PGSHIFT) & (NPTEPG - 1); 675 for (; sva < nssva; sva += NBPG, pte++) { 676 entry = pte->pt_entry; 677 if (!(entry & PG_V)) 678 continue; 679 entry = (entry & ~(PG_M | PG_RO)) | p; 680 pte->pt_entry = entry; 681 /* 682 * Update the TLB if the given address is in the cache. 683 */ 684 if (pmap->pm_tlbgen == tlbpid_gen) 685 MachTLBUpdate(sva | (pmap->pm_tlbpid << 686 VMMACH_TLB_PID_SHIFT), entry); 687 } 688 } 689 } 690 691 /* 692 * Insert the given physical page (p) at 693 * the specified virtual address (v) in the 694 * target physical map with the protection requested. 695 * 696 * If specified, the page will be wired down, meaning 697 * that the related pte can not be reclaimed. 698 * 699 * NB: This is the only routine which MAY NOT lazy-evaluate 700 * or lose information. That is, this routine must actually 701 * insert this page into the given map NOW. 702 */ 703 void 704 pmap_enter(pmap, va, pa, prot, wired) 705 register pmap_t pmap; 706 vm_offset_t va; 707 register vm_offset_t pa; 708 vm_prot_t prot; 709 boolean_t wired; 710 { 711 register pt_entry_t *pte; 712 register u_int npte; 713 register int i, j; 714 vm_page_t mem; 715 716 #ifdef DEBUG 717 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER)) 718 printf("pmap_enter(%x, %x, %x, %x, %x)\n", 719 pmap, va, pa, prot, wired); 720 #endif 721 #ifdef DIAGNOSTIC 722 if (!pmap) 723 panic("pmap_enter: pmap"); 724 if (!pmap->pm_segtab) { 725 enter_stats.kernel++; 726 if (va < VM_MIN_KERNEL_ADDRESS || va >= virtual_end) 727 panic("pmap_enter: kva"); 728 } else { 729 enter_stats.user++; 730 if (va >= VM_MAXUSER_ADDRESS) 731 panic("pmap_enter: uva"); 732 } 733 if (pa & 0x80000000) 734 panic("pmap_enter: pa"); 735 if (!(prot & VM_PROT_READ)) 736 panic("pmap_enter: prot"); 737 #endif 738 739 if (IS_VM_PHYSADDR(pa)) { 740 register pv_entry_t pv, npv; 741 int s; 742 743 if (!(prot & VM_PROT_WRITE)) 744 npte = PG_RO; 745 else { 746 register vm_page_t mem; 747 748 mem = PHYS_TO_VM_PAGE(pa); 749 if ((int)va < 0) { 750 /* 751 * Don't bother to trap on kernel writes, 752 * just record page as dirty. 753 */ 754 npte = PG_M; 755 mem->flags &= ~PG_CLEAN; 756 } else 757 #ifdef ATTR 758 if ((pmap_attributes[atop(pa)] & 759 PMAP_ATTR_MOD) || !(mem->flags & PG_CLEAN)) 760 #else 761 if (!(mem->flags & PG_CLEAN)) 762 #endif 763 npte = PG_M; 764 else 765 npte = 0; 766 } 767 768 #ifdef DEBUG 769 enter_stats.managed++; 770 #endif 771 /* 772 * Enter the pmap and virtual address into the 773 * physical to virtual map table. 774 */ 775 pv = pa_to_pvh(pa); 776 s = splimp(); 777 #ifdef DEBUG 778 if (pmapdebug & PDB_ENTER) 779 printf("pmap_enter: pv %x: was %x/%x/%x\n", 780 pv, pv->pv_va, pv->pv_pmap, pv->pv_next); 781 #endif 782 if (pv->pv_pmap == NULL) { 783 /* 784 * No entries yet, use header as the first entry 785 */ 786 #ifdef DEBUG 787 if (pmapdebug & PDB_PVENTRY) 788 printf("pmap_enter: first pv: pmap %x va %x\n", 789 pmap, va); 790 enter_stats.firstpv++; 791 #endif 792 pv->pv_va = va; 793 pv->pv_pmap = pmap; 794 pv->pv_next = NULL; 795 } else { 796 /* 797 * There is at least one other VA mapping this page. 798 * Place this entry after the header. 799 * 800 * Note: the entry may already be in the table if 801 * we are only changing the protection bits. 802 */ 803 for (npv = pv; npv; npv = npv->pv_next) 804 if (pmap == npv->pv_pmap && va == npv->pv_va) { 805 #ifdef DIAGNOSTIC 806 unsigned entry; 807 808 if (!pmap->pm_segtab) 809 entry = kvtopte(va)->pt_entry; 810 else { 811 pte = pmap_segmap(pmap, va); 812 if (pte) { 813 pte += (va >> PGSHIFT) & 814 (NPTEPG - 1); 815 entry = pte->pt_entry; 816 } else 817 entry = 0; 818 } 819 if (!(entry & PG_V) || 820 (entry & PG_FRAME) != pa) 821 printf( 822 "pmap_enter: found va %x pa %x in pv_table but != %x\n", 823 va, pa, entry); 824 #endif 825 goto fnd; 826 } 827 #ifdef DEBUG 828 if (pmapdebug & PDB_PVENTRY) 829 printf("pmap_enter: new pv: pmap %x va %x\n", 830 pmap, va); 831 #endif 832 /* can this cause us to recurse forever? */ 833 npv = (pv_entry_t) 834 malloc(sizeof *npv, M_VMPVENT, M_NOWAIT); 835 npv->pv_va = va; 836 npv->pv_pmap = pmap; 837 npv->pv_next = pv->pv_next; 838 pv->pv_next = npv; 839 #ifdef DEBUG 840 if (!npv->pv_next) 841 enter_stats.secondpv++; 842 #endif 843 fnd: 844 ; 845 } 846 splx(s); 847 } else { 848 /* 849 * Assumption: if it is not part of our managed memory 850 * then it must be device memory which may be volitile. 851 */ 852 #ifdef DEBUG 853 enter_stats.unmanaged++; 854 #endif 855 npte = (prot & VM_PROT_WRITE) ? (PG_M | PG_N) : (PG_RO | PG_N); 856 } 857 858 /* 859 * The only time we need to flush the cache is if we 860 * execute from a physical address and then change the data. 861 * This is the best place to do this. 862 * pmap_protect() and pmap_remove() are mostly used to switch 863 * between R/W and R/O pages. 864 * NOTE: we only support cache flush for read only text. 865 */ 866 if (prot == (VM_PROT_READ | VM_PROT_EXECUTE)) 867 MachFlushICache(MACH_PHYS_TO_CACHED(pa), PAGE_SIZE); 868 869 if (!pmap->pm_segtab) { 870 /* enter entries into kernel pmap */ 871 pte = kvtopte(va); 872 npte |= pa | PG_V | PG_G; 873 if (wired) { 874 pmap->pm_stats.wired_count += pmaxpagesperpage; 875 npte |= PG_WIRED; 876 } 877 i = pmaxpagesperpage; 878 do { 879 if (!(pte->pt_entry & PG_V)) { 880 pmap->pm_stats.resident_count++; 881 } else { 882 #ifdef DIAGNOSTIC 883 if (pte->pt_entry & PG_WIRED) 884 panic("pmap_enter: kernel wired"); 885 #endif 886 } 887 /* 888 * Update the same virtual address entry. 889 */ 890 MachTLBUpdate(va, npte); 891 pte->pt_entry = npte; 892 va += NBPG; 893 npte += NBPG; 894 pte++; 895 } while (--i != 0); 896 return; 897 } 898 899 if (!(pte = pmap_segmap(pmap, va))) { 900 mem = vm_page_alloc1(); 901 pmap_zero_page(VM_PAGE_TO_PHYS(mem)); 902 pmap_segmap(pmap, va) = pte = (pt_entry_t *) 903 MACH_PHYS_TO_CACHED(VM_PAGE_TO_PHYS(mem)); 904 } 905 pte += (va >> PGSHIFT) & (NPTEPG - 1); 906 907 /* 908 * Now validate mapping with desired protection/wiring. 909 * Assume uniform modified and referenced status for all 910 * PMAX pages in a MACH page. 911 */ 912 npte |= pa | PG_V; 913 if (wired) { 914 pmap->pm_stats.wired_count += pmaxpagesperpage; 915 npte |= PG_WIRED; 916 } 917 #ifdef DEBUG 918 if (pmapdebug & PDB_ENTER) { 919 printf("pmap_enter: new pte %x", npte); 920 if (pmap->pm_tlbgen == tlbpid_gen) 921 printf(" tlbpid %d", pmap->pm_tlbpid); 922 printf("\n"); 923 } 924 #endif 925 i = pmaxpagesperpage; 926 do { 927 pte->pt_entry = npte; 928 if (pmap->pm_tlbgen == tlbpid_gen) 929 MachTLBUpdate(va | (pmap->pm_tlbpid << 930 VMMACH_TLB_PID_SHIFT), npte); 931 va += NBPG; 932 npte += NBPG; 933 pte++; 934 } while (--i != 0); 935 } 936 937 /* 938 * Routine: pmap_change_wiring 939 * Function: Change the wiring attribute for a map/virtual-address 940 * pair. 941 * In/out conditions: 942 * The mapping must already exist in the pmap. 943 */ 944 void 945 pmap_change_wiring(pmap, va, wired) 946 register pmap_t pmap; 947 vm_offset_t va; 948 boolean_t wired; 949 { 950 register pt_entry_t *pte; 951 u_int p; 952 register int i; 953 954 #ifdef DEBUG 955 if (pmapdebug & (PDB_FOLLOW|PDB_WIRING)) 956 printf("pmap_change_wiring(%x, %x, %x)\n", pmap, va, wired); 957 #endif 958 if (pmap == NULL) 959 return; 960 961 p = wired ? PG_WIRED : 0; 962 963 /* 964 * Don't need to flush the TLB since PG_WIRED is only in software. 965 */ 966 if (!pmap->pm_segtab) { 967 /* change entries in kernel pmap */ 968 #ifdef DIAGNOSTIC 969 if (va < VM_MIN_KERNEL_ADDRESS || va >= virtual_end) 970 panic("pmap_change_wiring"); 971 #endif 972 pte = kvtopte(va); 973 } else { 974 if (!(pte = pmap_segmap(pmap, va))) 975 return; 976 pte += (va >> PGSHIFT) & (NPTEPG - 1); 977 } 978 979 i = pmaxpagesperpage; 980 if (!(pte->pt_entry & PG_WIRED) && p) 981 pmap->pm_stats.wired_count += i; 982 else if ((pte->pt_entry & PG_WIRED) && !p) 983 pmap->pm_stats.wired_count -= i; 984 do { 985 if (pte->pt_entry & PG_V) 986 pte->pt_entry = (pte->pt_entry & ~PG_WIRED) | p; 987 pte++; 988 } while (--i != 0); 989 } 990 991 /* 992 * Routine: pmap_extract 993 * Function: 994 * Extract the physical page address associated 995 * with the given map/virtual_address pair. 996 */ 997 vm_offset_t 998 pmap_extract(pmap, va) 999 register pmap_t pmap; 1000 vm_offset_t va; 1001 { 1002 register vm_offset_t pa; 1003 1004 #ifdef DEBUG 1005 if (pmapdebug & PDB_FOLLOW) 1006 printf("pmap_extract(%x, %x) -> ", pmap, va); 1007 #endif 1008 1009 if (!pmap->pm_segtab) { 1010 #ifdef DIAGNOSTIC 1011 if (va < VM_MIN_KERNEL_ADDRESS || va >= virtual_end) 1012 panic("pmap_extract"); 1013 #endif 1014 pa = kvtopte(va)->pt_entry & PG_FRAME; 1015 } else { 1016 register pt_entry_t *pte; 1017 1018 if (!(pte = pmap_segmap(pmap, va))) 1019 pa = 0; 1020 else { 1021 pte += (va >> PGSHIFT) & (NPTEPG - 1); 1022 pa = pte->pt_entry & PG_FRAME; 1023 } 1024 } 1025 if (pa) 1026 pa |= va & PGOFSET; 1027 1028 #ifdef DEBUG 1029 if (pmapdebug & PDB_FOLLOW) 1030 printf("pmap_extract: pa %x\n", pa); 1031 #endif 1032 return (pa); 1033 } 1034 1035 /* 1036 * Copy the range specified by src_addr/len 1037 * from the source map to the range dst_addr/len 1038 * in the destination map. 1039 * 1040 * This routine is only advisory and need not do anything. 1041 */ 1042 void 1043 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1044 pmap_t dst_pmap; 1045 pmap_t src_pmap; 1046 vm_offset_t dst_addr; 1047 vm_size_t len; 1048 vm_offset_t src_addr; 1049 { 1050 1051 #ifdef DEBUG 1052 if (pmapdebug & PDB_FOLLOW) 1053 printf("pmap_copy(%x, %x, %x, %x, %x)\n", 1054 dst_pmap, src_pmap, dst_addr, len, src_addr); 1055 #endif 1056 } 1057 1058 /* 1059 * Require that all active physical maps contain no 1060 * incorrect entries NOW. [This update includes 1061 * forcing updates of any address map caching.] 1062 * 1063 * Generally used to insure that a thread about 1064 * to run will see a semantically correct world. 1065 */ 1066 void 1067 pmap_update() 1068 { 1069 1070 #ifdef DEBUG 1071 if (pmapdebug & PDB_FOLLOW) 1072 printf("pmap_update()\n"); 1073 #endif 1074 } 1075 1076 /* 1077 * Routine: pmap_collect 1078 * Function: 1079 * Garbage collects the physical map system for 1080 * pages which are no longer used. 1081 * Success need not be guaranteed -- that is, there 1082 * may well be pages which are not referenced, but 1083 * others may be collected. 1084 * Usage: 1085 * Called by the pageout daemon when pages are scarce. 1086 */ 1087 void 1088 pmap_collect(pmap) 1089 pmap_t pmap; 1090 { 1091 1092 #ifdef DEBUG 1093 if (pmapdebug & PDB_FOLLOW) 1094 printf("pmap_collect(%x)\n", pmap); 1095 #endif 1096 } 1097 1098 /* 1099 * pmap_zero_page zeros the specified (machine independent) 1100 * page. 1101 */ 1102 void 1103 pmap_zero_page(phys) 1104 vm_offset_t phys; 1105 { 1106 register int *p, *end; 1107 1108 #ifdef DEBUG 1109 if (pmapdebug & PDB_FOLLOW) 1110 printf("pmap_zero_page(%x)\n", phys); 1111 #endif 1112 p = (int *)MACH_PHYS_TO_CACHED(phys); 1113 end = p + PAGE_SIZE / sizeof(int); 1114 do { 1115 p[0] = 0; 1116 p[1] = 0; 1117 p[2] = 0; 1118 p[3] = 0; 1119 p += 4; 1120 } while (p != end); 1121 } 1122 1123 /* 1124 * pmap_copy_page copies the specified (machine independent) 1125 * page. 1126 */ 1127 void 1128 pmap_copy_page(src, dst) 1129 vm_offset_t src, dst; 1130 { 1131 register int *s, *d, *end; 1132 register int tmp0, tmp1, tmp2, tmp3; 1133 1134 #ifdef DEBUG 1135 if (pmapdebug & PDB_FOLLOW) 1136 printf("pmap_copy_page(%x, %x)\n", src, dst); 1137 #endif 1138 s = (int *)MACH_PHYS_TO_CACHED(src); 1139 d = (int *)MACH_PHYS_TO_CACHED(dst); 1140 end = s + PAGE_SIZE / sizeof(int); 1141 do { 1142 tmp0 = s[0]; 1143 tmp1 = s[1]; 1144 tmp2 = s[2]; 1145 tmp3 = s[3]; 1146 d[0] = tmp0; 1147 d[1] = tmp1; 1148 d[2] = tmp2; 1149 d[3] = tmp3; 1150 s += 4; 1151 d += 4; 1152 } while (s != end); 1153 } 1154 1155 /* 1156 * Routine: pmap_pageable 1157 * Function: 1158 * Make the specified pages (by pmap, offset) 1159 * pageable (or not) as requested. 1160 * 1161 * A page which is not pageable may not take 1162 * a fault; therefore, its page table entry 1163 * must remain valid for the duration. 1164 * 1165 * This routine is merely advisory; pmap_enter 1166 * will specify that these pages are to be wired 1167 * down (or not) as appropriate. 1168 */ 1169 void 1170 pmap_pageable(pmap, sva, eva, pageable) 1171 pmap_t pmap; 1172 vm_offset_t sva, eva; 1173 boolean_t pageable; 1174 { 1175 1176 #ifdef DEBUG 1177 if (pmapdebug & PDB_FOLLOW) 1178 printf("pmap_pageable(%x, %x, %x, %x)\n", 1179 pmap, sva, eva, pageable); 1180 #endif 1181 } 1182 1183 /* 1184 * Clear the modify bits on the specified physical page. 1185 */ 1186 void 1187 pmap_clear_modify(pa) 1188 vm_offset_t pa; 1189 { 1190 1191 #ifdef DEBUG 1192 if (pmapdebug & PDB_FOLLOW) 1193 printf("pmap_clear_modify(%x)\n", pa); 1194 #endif 1195 #ifdef ATTR 1196 pmap_attributes[atop(pa)] &= ~PMAP_ATTR_MOD; 1197 #endif 1198 } 1199 1200 /* 1201 * pmap_clear_reference: 1202 * 1203 * Clear the reference bit on the specified physical page. 1204 */ 1205 void 1206 pmap_clear_reference(pa) 1207 vm_offset_t pa; 1208 { 1209 1210 #ifdef DEBUG 1211 if (pmapdebug & PDB_FOLLOW) 1212 printf("pmap_clear_reference(%x)\n", pa); 1213 #endif 1214 #ifdef ATTR 1215 pmap_attributes[atop(pa)] &= ~PMAP_ATTR_REF; 1216 #endif 1217 } 1218 1219 /* 1220 * pmap_is_referenced: 1221 * 1222 * Return whether or not the specified physical page is referenced 1223 * by any physical maps. 1224 */ 1225 boolean_t 1226 pmap_is_referenced(pa) 1227 vm_offset_t pa; 1228 { 1229 #ifdef ATTR 1230 return (pmap_attributes[atop(pa)] & PMAP_ATTR_REF); 1231 #else 1232 return (FALSE); 1233 #endif 1234 } 1235 1236 /* 1237 * pmap_is_modified: 1238 * 1239 * Return whether or not the specified physical page is modified 1240 * by any physical maps. 1241 */ 1242 boolean_t 1243 pmap_is_modified(pa) 1244 vm_offset_t pa; 1245 { 1246 #ifdef ATTR 1247 return (pmap_attributes[atop(pa)] & PMAP_ATTR_MOD); 1248 #else 1249 return (FALSE); 1250 #endif 1251 } 1252 1253 vm_offset_t 1254 pmap_phys_address(ppn) 1255 int ppn; 1256 { 1257 1258 #ifdef DEBUG 1259 if (pmapdebug & PDB_FOLLOW) 1260 printf("pmap_phys_address(%x)\n", ppn); 1261 #endif 1262 return (pmax_ptob(ppn)); 1263 } 1264 1265 /* 1266 * Miscellaneous support routines 1267 */ 1268 1269 /* 1270 * Allocate a hardware PID and return it. 1271 * It takes almost as much or more time to search the TLB for a 1272 * specific PID and flush those entries as it does to flush the entire TLB. 1273 * Therefore, when we allocate a new PID, we just take the next number. When 1274 * we run out of numbers, we flush the TLB, increment the generation count 1275 * and start over. PID zero is reserved for kernel use. 1276 * This is called only by switch(). 1277 */ 1278 int 1279 pmap_alloc_tlbpid(p) 1280 register struct proc *p; 1281 { 1282 register pmap_t pmap; 1283 register int id; 1284 1285 pmap = &p->p_vmspace->vm_pmap; 1286 if (pmap->pm_tlbgen != tlbpid_gen) { 1287 id = tlbpid_cnt; 1288 if (id == VMMACH_NUM_PIDS) { 1289 MachTLBFlush(); 1290 /* reserve tlbpid_gen == 0 to alway mean invalid */ 1291 if (++tlbpid_gen == 0) 1292 tlbpid_gen = 1; 1293 id = 1; 1294 } 1295 tlbpid_cnt = id + 1; 1296 pmap->pm_tlbpid = id; 1297 pmap->pm_tlbgen = tlbpid_gen; 1298 } else 1299 id = pmap->pm_tlbpid; 1300 1301 #ifdef DEBUG 1302 if (pmapdebug & (PDB_FOLLOW|PDB_TLBPID)) { 1303 if (curproc) 1304 printf("pmap_alloc_tlbpid: curproc %d '%s' ", 1305 curproc->p_pid, curproc->p_comm); 1306 else 1307 printf("pmap_alloc_tlbpid: curproc <none> "); 1308 printf("segtab %x tlbpid %d pid %d '%s'\n", 1309 pmap->pm_segtab, id, p->p_pid, p->p_comm); 1310 } 1311 #endif 1312 return (id); 1313 } 1314 1315 /* 1316 * Remove a physical to virtual address translation. 1317 */ 1318 void 1319 pmap_remove_pv(pmap, va, pa) 1320 pmap_t pmap; 1321 vm_offset_t va, pa; 1322 { 1323 register pv_entry_t pv, npv; 1324 int s; 1325 1326 #ifdef DEBUG 1327 if (pmapdebug & (PDB_FOLLOW|PDB_PVENTRY)) 1328 printf("pmap_remove_pv(%x, %x, %x)\n", pmap, va, pa); 1329 #endif 1330 /* 1331 * Remove page from the PV table (raise IPL since we 1332 * may be called at interrupt time). 1333 */ 1334 if (!IS_VM_PHYSADDR(pa)) 1335 return; 1336 pv = pa_to_pvh(pa); 1337 s = splimp(); 1338 /* 1339 * If it is the first entry on the list, it is actually 1340 * in the header and we must copy the following entry up 1341 * to the header. Otherwise we must search the list for 1342 * the entry. In either case we free the now unused entry. 1343 */ 1344 if (pmap == pv->pv_pmap && va == pv->pv_va) { 1345 npv = pv->pv_next; 1346 if (npv) { 1347 *pv = *npv; 1348 free((caddr_t)npv, M_VMPVENT); 1349 } else 1350 pv->pv_pmap = NULL; 1351 #ifdef DEBUG 1352 remove_stats.pvfirst++; 1353 #endif 1354 } else { 1355 for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) { 1356 #ifdef DEBUG 1357 remove_stats.pvsearch++; 1358 #endif 1359 if (pmap == npv->pv_pmap && va == npv->pv_va) 1360 goto fnd; 1361 } 1362 #ifdef DIAGNOSTIC 1363 printf("pmap_remove_pv(%x, %x, %x) not found\n", pmap, va, pa); 1364 panic("pmap_remove_pv"); 1365 #endif 1366 fnd: 1367 pv->pv_next = npv->pv_next; 1368 free((caddr_t)npv, M_VMPVENT); 1369 } 1370 splx(s); 1371 } 1372 1373 /* 1374 * vm_page_alloc1: 1375 * 1376 * Allocate and return a memory cell with no associated object. 1377 */ 1378 vm_page_t 1379 vm_page_alloc1() 1380 { 1381 register vm_page_t mem; 1382 int spl; 1383 1384 spl = splimp(); /* XXX */ 1385 simple_lock(&vm_page_queue_free_lock); 1386 if (vm_page_queue_free.tqh_first == NULL) { 1387 simple_unlock(&vm_page_queue_free_lock); 1388 splx(spl); 1389 return (NULL); 1390 } 1391 1392 mem = vm_page_queue_free.tqh_first; 1393 TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); 1394 1395 cnt.v_free_count--; 1396 simple_unlock(&vm_page_queue_free_lock); 1397 splx(spl); 1398 1399 mem->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 1400 mem->wire_count = 0; 1401 1402 /* 1403 * Decide if we should poke the pageout daemon. 1404 * We do this if the free count is less than the low 1405 * water mark, or if the free count is less than the high 1406 * water mark (but above the low water mark) and the inactive 1407 * count is less than its target. 1408 * 1409 * We don't have the counts locked ... if they change a little, 1410 * it doesn't really matter. 1411 */ 1412 1413 if (cnt.v_free_count < cnt.v_free_min || 1414 (cnt.v_free_count < cnt.v_free_target && 1415 cnt.v_inactive_count < cnt.v_inactive_target)) 1416 thread_wakeup((int)&vm_pages_needed); 1417 return (mem); 1418 } 1419 1420 /* 1421 * vm_page_free1: 1422 * 1423 * Returns the given page to the free list, 1424 * disassociating it with any VM object. 1425 * 1426 * Object and page must be locked prior to entry. 1427 */ 1428 void 1429 vm_page_free1(mem) 1430 register vm_page_t mem; 1431 { 1432 1433 if (mem->flags & PG_ACTIVE) { 1434 TAILQ_REMOVE(&vm_page_queue_active, mem, pageq); 1435 mem->flags &= ~PG_ACTIVE; 1436 cnt.v_active_count--; 1437 } 1438 1439 if (mem->flags & PG_INACTIVE) { 1440 TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq); 1441 mem->flags &= ~PG_INACTIVE; 1442 cnt.v_inactive_count--; 1443 } 1444 1445 if (!(mem->flags & PG_FICTITIOUS)) { 1446 int spl; 1447 1448 spl = splimp(); 1449 simple_lock(&vm_page_queue_free_lock); 1450 TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq); 1451 1452 cnt.v_free_count++; 1453 simple_unlock(&vm_page_queue_free_lock); 1454 splx(spl); 1455 } 1456 } 1457