1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * Copyright (c) 1994 John S. Dyson 4 * Copyright (c) 1994 David Greenman 5 * Copyright (c) 2003 Peter Wemm 6 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 7 * Copyright (c) 2008-2019 The DragonFly Project. 8 * Copyright (c) 2008, 2009 Jordan Gordeev. 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ 45 */ 46 47 /* 48 * Manages physical address maps. 49 */ 50 51 #include "opt_msgbuf.h" 52 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/proc.h> 57 #include <sys/msgbuf.h> 58 #include <sys/vmmeter.h> 59 #include <sys/mman.h> 60 #include <sys/vmspace.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <sys/sysctl.h> 65 #include <sys/lock.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_pager.h> 73 #include <vm/vm_zone.h> 74 75 #include <sys/thread2.h> 76 #include <sys/spinlock2.h> 77 #include <vm/vm_page2.h> 78 79 #include <machine/cputypes.h> 80 #include <machine/md_var.h> 81 #include <machine/specialreg.h> 82 #include <machine/smp.h> 83 #include <machine/globaldata.h> 84 #include <machine/pcb.h> 85 #include <machine/pmap.h> 86 #include <machine/pmap_inval.h> 87 88 #include <ddb/ddb.h> 89 90 #include <stdio.h> 91 #include <assert.h> 92 #include <stdlib.h> 93 94 #define PMAP_KEEP_PDIRS 95 #ifndef PMAP_SHPGPERPROC 96 #define PMAP_SHPGPERPROC 1000 97 #endif 98 99 #if defined(DIAGNOSTIC) 100 #define PMAP_DIAGNOSTIC 101 #endif 102 103 #define MINPV 2048 104 105 #if !defined(PMAP_DIAGNOSTIC) 106 #define PMAP_INLINE __inline 107 #else 108 #define PMAP_INLINE 109 #endif 110 111 /* 112 * Get PDEs and PTEs for user/kernel address space 113 */ 114 static pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va); 115 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 116 117 #define pmap_pde_v(pte) ((*(pd_entry_t *)pte & VPTE_V) != 0) 118 #define pmap_pte_w(pte) ((*(pt_entry_t *)pte & VPTE_WIRED) != 0) 119 #define pmap_pte_m(pte) ((*(pt_entry_t *)pte & VPTE_M) != 0) 120 #define pmap_pte_u(pte) ((*(pt_entry_t *)pte & VPTE_A) != 0) 121 #define pmap_pte_v(pte) ((*(pt_entry_t *)pte & VPTE_V) != 0) 122 123 /* 124 * Given a map and a machine independent protection code, 125 * convert to a vax protection code. 126 */ 127 #define pte_prot(m, p) \ 128 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) 129 static uint64_t protection_codes[8]; 130 131 static struct pmap kernel_pmap_store; 132 struct pmap *kernel_pmap = &kernel_pmap_store; 133 134 static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 135 136 static struct vm_object kptobj; 137 static int nkpt; 138 139 static uint64_t KPDphys; /* phys addr of kernel level 2 */ 140 uint64_t KPDPphys; /* phys addr of kernel level 3 */ 141 uint64_t KPML4phys; /* phys addr of kernel level 4 */ 142 143 extern void *vkernel_stack; 144 145 /* 146 * Data for the pv entry allocation mechanism 147 */ 148 static vm_zone_t pvzone; 149 static struct vm_zone pvzone_store; 150 static vm_pindex_t pv_entry_count = 0; 151 static vm_pindex_t pv_entry_max = 0; 152 static vm_pindex_t pv_entry_high_water = 0; 153 static int pmap_pagedaemon_waken = 0; 154 static struct pv_entry *pvinit; 155 156 /* 157 * All those kernel PT submaps that BSD is so fond of 158 */ 159 pt_entry_t *CMAP1 = NULL, *ptmmap; 160 caddr_t CADDR1 = NULL; 161 static pt_entry_t *msgbufmap; 162 163 uint64_t KPTphys; 164 165 static PMAP_INLINE void free_pv_entry (pv_entry_t pv); 166 static pv_entry_t get_pv_entry (void); 167 static void x86_64_protection_init (void); 168 static __inline void pmap_clearbit (vm_page_t m, int bit); 169 170 static void pmap_remove_all (vm_page_t m); 171 static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, 172 pt_entry_t oldpte, vm_offset_t sva); 173 static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); 174 static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, 175 vm_offset_t va); 176 static boolean_t pmap_testbit (vm_page_t m, int bit); 177 static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, 178 vm_page_t mpte, vm_page_t m, pv_entry_t); 179 180 static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); 181 182 static int pmap_release_free_page (pmap_t pmap, vm_page_t p); 183 static vm_page_t _pmap_allocpte (pmap_t pmap, vm_pindex_t ptepindex); 184 static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); 185 static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); 186 187 static int 188 pv_entry_compare(pv_entry_t pv1, pv_entry_t pv2) 189 { 190 if (pv1->pv_va < pv2->pv_va) 191 return(-1); 192 if (pv1->pv_va > pv2->pv_va) 193 return(1); 194 return(0); 195 } 196 197 RB_GENERATE2(pv_entry_rb_tree, pv_entry, pv_entry, 198 pv_entry_compare, vm_offset_t, pv_va); 199 200 static __inline vm_pindex_t 201 pmap_pt_pindex(vm_offset_t va) 202 { 203 return va >> PDRSHIFT; 204 } 205 206 static __inline vm_pindex_t 207 pmap_pte_index(vm_offset_t va) 208 { 209 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 210 } 211 212 static __inline vm_pindex_t 213 pmap_pde_index(vm_offset_t va) 214 { 215 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 216 } 217 218 static __inline vm_pindex_t 219 pmap_pdpe_index(vm_offset_t va) 220 { 221 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 222 } 223 224 static __inline vm_pindex_t 225 pmap_pml4e_index(vm_offset_t va) 226 { 227 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 228 } 229 230 /* Return a pointer to the PML4 slot that corresponds to a VA */ 231 static __inline pml4_entry_t * 232 pmap_pml4e(pmap_t pmap, vm_offset_t va) 233 { 234 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 235 } 236 237 /* Return a pointer to the PDP slot that corresponds to a VA */ 238 static __inline pdp_entry_t * 239 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) 240 { 241 pdp_entry_t *pdpe; 242 243 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & VPTE_FRAME); 244 return (&pdpe[pmap_pdpe_index(va)]); 245 } 246 247 /* Return a pointer to the PDP slot that corresponds to a VA */ 248 static __inline pdp_entry_t * 249 pmap_pdpe(pmap_t pmap, vm_offset_t va) 250 { 251 pml4_entry_t *pml4e; 252 253 pml4e = pmap_pml4e(pmap, va); 254 if ((*pml4e & VPTE_V) == 0) 255 return NULL; 256 return (pmap_pml4e_to_pdpe(pml4e, va)); 257 } 258 259 /* Return a pointer to the PD slot that corresponds to a VA */ 260 static __inline pd_entry_t * 261 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) 262 { 263 pd_entry_t *pde; 264 265 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & VPTE_FRAME); 266 return (&pde[pmap_pde_index(va)]); 267 } 268 269 /* Return a pointer to the PD slot that corresponds to a VA */ 270 static __inline pd_entry_t * 271 pmap_pde(pmap_t pmap, vm_offset_t va) 272 { 273 pdp_entry_t *pdpe; 274 275 pdpe = pmap_pdpe(pmap, va); 276 if (pdpe == NULL || (*pdpe & VPTE_V) == 0) 277 return NULL; 278 return (pmap_pdpe_to_pde(pdpe, va)); 279 } 280 281 /* Return a pointer to the PT slot that corresponds to a VA */ 282 static __inline pt_entry_t * 283 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 284 { 285 pt_entry_t *pte; 286 287 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & VPTE_FRAME); 288 return (&pte[pmap_pte_index(va)]); 289 } 290 291 /* 292 * Hold pt_m for page table scans to prevent it from getting reused out 293 * from under us across blocking conditions in the body of the loop. 294 */ 295 static __inline 296 vm_page_t 297 pmap_hold_pt_page(pd_entry_t *pde, vm_offset_t va) 298 { 299 pt_entry_t pte; 300 vm_page_t pt_m; 301 302 pte = (pt_entry_t)*pde; 303 KKASSERT(pte != 0); 304 pt_m = PHYS_TO_VM_PAGE(pte & VPTE_FRAME); 305 vm_page_hold(pt_m); 306 307 return pt_m; 308 } 309 310 /* Return a pointer to the PT slot that corresponds to a VA */ 311 static __inline pt_entry_t * 312 pmap_pte(pmap_t pmap, vm_offset_t va) 313 { 314 pd_entry_t *pde; 315 316 pde = pmap_pde(pmap, va); 317 if (pde == NULL || (*pde & VPTE_V) == 0) 318 return NULL; 319 if ((*pde & VPTE_PS) != 0) /* compat with x86 pmap_pte() */ 320 return ((pt_entry_t *)pde); 321 return (pmap_pde_to_pte(pde, va)); 322 } 323 324 static PMAP_INLINE pt_entry_t * 325 vtopte(vm_offset_t va) 326 { 327 pt_entry_t *x; 328 x = pmap_pte(kernel_pmap, va); 329 assert(x != NULL); 330 return x; 331 } 332 333 static __inline pd_entry_t * 334 vtopde(vm_offset_t va) 335 { 336 pd_entry_t *x; 337 x = pmap_pde(kernel_pmap, va); 338 assert(x != NULL); 339 return x; 340 } 341 342 /* 343 * Returns the physical address translation from va for a user address. 344 * (vm_paddr_t)-1 is returned on failure. 345 */ 346 vm_paddr_t 347 uservtophys(vm_offset_t va) 348 { 349 struct vmspace *vm = curproc->p_vmspace; 350 vm_page_t m; 351 vm_paddr_t pa; 352 int error; 353 int busy; 354 355 /* XXX No idea how to handle this case in a simple way, just abort */ 356 if (PAGE_SIZE - (va & PAGE_MASK) < sizeof(u_int)) 357 return ((vm_paddr_t)-1); 358 359 m = vm_fault_page(&vm->vm_map, trunc_page(va), 360 VM_PROT_READ|VM_PROT_WRITE, 361 VM_FAULT_NORMAL, 362 &error, &busy); 363 if (error) 364 return ((vm_paddr_t)-1); 365 366 pa = VM_PAGE_TO_PHYS(m) | (va & PAGE_MASK); 367 if (busy) 368 vm_page_wakeup(m); 369 else 370 vm_page_unhold(m); 371 372 return pa; 373 } 374 375 static uint64_t 376 allocpages(vm_paddr_t *firstaddr, int n) 377 { 378 uint64_t ret; 379 380 ret = *firstaddr; 381 /*bzero((void *)ret, n * PAGE_SIZE); not mapped yet */ 382 *firstaddr += n * PAGE_SIZE; 383 return (ret); 384 } 385 386 static void 387 create_pagetables(vm_paddr_t *firstaddr, int64_t ptov_offset) 388 { 389 int i; 390 pml4_entry_t *KPML4virt; 391 pdp_entry_t *KPDPvirt; 392 pd_entry_t *KPDvirt; 393 pt_entry_t *KPTvirt; 394 int kpml4i = pmap_pml4e_index(ptov_offset); 395 int kpdpi = pmap_pdpe_index(ptov_offset); 396 int kpdi = pmap_pde_index(ptov_offset); 397 398 /* 399 * Calculate NKPT - number of kernel page tables. We have to 400 * accomodoate prealloction of the vm_page_array, dump bitmap, 401 * MSGBUF_SIZE, and other stuff. Be generous. 402 * 403 * Maxmem is in pages. 404 */ 405 nkpt = (Maxmem * (sizeof(struct vm_page) * 2) + MSGBUF_SIZE) / NBPDR; 406 /* 407 * Allocate pages 408 */ 409 KPML4phys = allocpages(firstaddr, 1); 410 KPDPphys = allocpages(firstaddr, NKPML4E); 411 KPDphys = allocpages(firstaddr, NKPDPE); 412 KPTphys = allocpages(firstaddr, nkpt); 413 414 KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 415 KPDPvirt = (pdp_entry_t *)PHYS_TO_DMAP(KPDPphys); 416 KPDvirt = (pd_entry_t *)PHYS_TO_DMAP(KPDphys); 417 KPTvirt = (pt_entry_t *)PHYS_TO_DMAP(KPTphys); 418 419 bzero(KPML4virt, 1 * PAGE_SIZE); 420 bzero(KPDPvirt, NKPML4E * PAGE_SIZE); 421 bzero(KPDvirt, NKPDPE * PAGE_SIZE); 422 bzero(KPTvirt, nkpt * PAGE_SIZE); 423 424 /* Now map the page tables at their location within PTmap */ 425 for (i = 0; i < nkpt; i++) { 426 KPDvirt[i + kpdi] = KPTphys + (i << PAGE_SHIFT); 427 KPDvirt[i + kpdi] |= VPTE_RW | VPTE_V | VPTE_U; 428 } 429 430 /* And connect up the PD to the PDP */ 431 for (i = 0; i < NKPDPE; i++) { 432 KPDPvirt[i + kpdpi] = KPDphys + (i << PAGE_SHIFT); 433 KPDPvirt[i + kpdpi] |= VPTE_RW | VPTE_V | VPTE_U; 434 } 435 436 /* And recursively map PML4 to itself in order to get PTmap */ 437 KPML4virt[PML4PML4I] = KPML4phys; 438 KPML4virt[PML4PML4I] |= VPTE_RW | VPTE_V | VPTE_U; 439 440 /* Connect the KVA slot up to the PML4 */ 441 KPML4virt[kpml4i] = KPDPphys; 442 KPML4virt[kpml4i] |= VPTE_RW | VPTE_V | VPTE_U; 443 } 444 445 /* 446 * Typically used to initialize a fictitious page by vm/device_pager.c 447 */ 448 void 449 pmap_page_init(struct vm_page *m) 450 { 451 vm_page_init(m); 452 TAILQ_INIT(&m->md.pv_list); 453 } 454 455 /* 456 * Bootstrap the system enough to run with virtual memory. 457 * 458 * On x86_64 this is called after mapping has already been enabled 459 * and just syncs the pmap module with what has already been done. 460 * [We can't call it easily with mapping off since the kernel is not 461 * mapped with PA == VA, hence we would have to relocate every address 462 * from the linked base (virtual) address "KERNBASE" to the actual 463 * (physical) address starting relative to 0] 464 */ 465 void 466 pmap_bootstrap(vm_paddr_t *firstaddr, int64_t ptov_offset) 467 { 468 vm_offset_t va; 469 pt_entry_t *pte; 470 471 /* 472 * Create an initial set of page tables to run the kernel in. 473 */ 474 create_pagetables(firstaddr, ptov_offset); 475 476 virtual_start = KvaStart; 477 virtual_end = KvaEnd; 478 479 /* 480 * Initialize protection array. 481 */ 482 x86_64_protection_init(); 483 484 /* 485 * The kernel's pmap is statically allocated so we don't have to use 486 * pmap_create, which is unlikely to work correctly at this part of 487 * the boot sequence (XXX and which no longer exists). 488 * 489 * The kernel_pmap's pm_pteobj is used only for locking and not 490 * for mmu pages. 491 */ 492 kernel_pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 493 kernel_pmap->pm_count = 1; 494 /* don't allow deactivation */ 495 CPUMASK_ASSALLONES(kernel_pmap->pm_active); 496 kernel_pmap->pm_pteobj = NULL; /* see pmap_init */ 497 RB_INIT(&kernel_pmap->pm_pvroot); 498 spin_init(&kernel_pmap->pm_spin, "pmapbootstrap"); 499 500 /* 501 * Reserve some special page table entries/VA space for temporary 502 * mapping of pages. 503 */ 504 #define SYSMAP(c, p, v, n) \ 505 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 506 507 va = virtual_start; 508 pte = pmap_pte(kernel_pmap, va); 509 /* 510 * CMAP1/CMAP2 are used for zeroing and copying pages. 511 */ 512 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 513 514 #if 0 /* JGV */ 515 /* 516 * Crashdump maps. 517 */ 518 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 519 #endif 520 521 /* 522 * ptvmmap is used for reading arbitrary physical pages via 523 * /dev/mem. 524 */ 525 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 526 527 /* 528 * msgbufp is used to map the system message buffer. 529 * XXX msgbufmap is not used. 530 */ 531 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 532 atop(round_page(MSGBUF_SIZE))) 533 534 virtual_start = va; 535 536 *CMAP1 = 0; 537 cpu_invltlb(); 538 } 539 540 /* 541 * Initialize the pmap module. 542 * Called by vm_init, to initialize any structures that the pmap 543 * system needs to map virtual memory. 544 * pmap_init has been enhanced to support in a fairly consistant 545 * way, discontiguous physical memory. 546 */ 547 void 548 pmap_init(void) 549 { 550 vm_pindex_t i; 551 vm_pindex_t initial_pvs; 552 553 /* 554 * object for kernel page table pages 555 */ 556 /* JG I think the number can be arbitrary */ 557 vm_object_init(&kptobj, 5); 558 kernel_pmap->pm_pteobj = &kptobj; 559 560 /* 561 * Allocate memory for random pmap data structures. Includes the 562 * pv_head_table. 563 */ 564 for (i = 0; i < vm_page_array_size; i++) { 565 vm_page_t m; 566 567 m = &vm_page_array[i]; 568 TAILQ_INIT(&m->md.pv_list); 569 m->md.pv_list_count = 0; 570 } 571 572 /* 573 * init the pv free list 574 */ 575 initial_pvs = vm_page_array_size; 576 if (initial_pvs < MINPV) 577 initial_pvs = MINPV; 578 pvzone = &pvzone_store; 579 pvinit = (struct pv_entry *) 580 kmem_alloc(kernel_map, 581 initial_pvs * sizeof (struct pv_entry), 582 VM_SUBSYS_PVENTRY); 583 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 584 initial_pvs); 585 586 /* 587 * Now it is safe to enable pv_table recording. 588 */ 589 pmap_initialized = TRUE; 590 } 591 592 /* 593 * Initialize the address space (zone) for the pv_entries. Set a 594 * high water mark so that the system can recover from excessive 595 * numbers of pv entries. 596 */ 597 void 598 pmap_init2(void) 599 { 600 vm_pindex_t shpgperproc = PMAP_SHPGPERPROC; 601 602 TUNABLE_LONG_FETCH("vm.pmap.shpgperproc", &shpgperproc); 603 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 604 TUNABLE_LONG_FETCH("vm.pmap.pv_entries", &pv_entry_max); 605 pv_entry_high_water = 9 * (pv_entry_max / 10); 606 zinitna(pvzone, NULL, 0, pv_entry_max, ZONE_INTERRUPT); 607 } 608 609 610 /*************************************************** 611 * Low level helper routines..... 612 ***************************************************/ 613 614 /* 615 * The modification bit is not tracked for any pages in this range. XXX 616 * such pages in this maps should always use pmap_k*() functions and not 617 * be managed anyhow. 618 * 619 * XXX User and kernel address spaces are independant for virtual kernels, 620 * this function only applies to the kernel pmap. 621 */ 622 static void 623 pmap_track_modified(pmap_t pmap, vm_offset_t va) 624 { 625 KKASSERT(pmap != kernel_pmap || 626 va < clean_sva || va >= clean_eva); 627 } 628 629 /* 630 * Extract the physical page address associated with the map/VA pair. 631 * 632 * No requirements. 633 */ 634 vm_paddr_t 635 pmap_extract(pmap_t pmap, vm_offset_t va, void **handlep) 636 { 637 vm_paddr_t rtval; 638 pt_entry_t *pte; 639 pd_entry_t pde, *pdep; 640 641 vm_object_hold(pmap->pm_pteobj); 642 rtval = 0; 643 pdep = pmap_pde(pmap, va); 644 if (pdep != NULL) { 645 pde = *pdep; 646 if (pde) { 647 if ((pde & VPTE_PS) != 0) { 648 /* JGV */ 649 rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 650 } else { 651 pte = pmap_pde_to_pte(pdep, va); 652 rtval = (*pte & VPTE_FRAME) | (va & PAGE_MASK); 653 } 654 } 655 } 656 if (handlep) 657 *handlep = NULL; /* XXX */ 658 vm_object_drop(pmap->pm_pteobj); 659 660 return rtval; 661 } 662 663 void 664 pmap_extract_done(void *handle) 665 { 666 pmap_t pmap; 667 668 if (handle) { 669 pmap = handle; 670 vm_object_drop(pmap->pm_pteobj); 671 } 672 } 673 674 /* 675 * Similar to extract but checks protections, SMP-friendly short-cut for 676 * vm_fault_page[_quick](). 677 * 678 * WARNING! THE RETURNED PAGE IS ONLY HELD AND NEITHER IT NOR ITS TARGET 679 * DATA IS SUITABLE FOR WRITING. Writing can interfere with 680 * pageouts flushes, msync, etc. The hold_count is not enough 681 * to avoid races against pageouts and other flush code doesn't 682 * care about hold_count. 683 */ 684 vm_page_t 685 pmap_fault_page_quick(pmap_t pmap __unused, vm_offset_t vaddr __unused, 686 vm_prot_t prot __unused, int *busyp __unused) 687 { 688 return(NULL); 689 } 690 691 /* 692 * Routine: pmap_kextract 693 * Function: 694 * Extract the physical page address associated 695 * kernel virtual address. 696 */ 697 vm_paddr_t 698 pmap_kextract(vm_offset_t va) 699 { 700 pd_entry_t pde; 701 vm_paddr_t pa; 702 703 KKASSERT(va >= KvaStart && va < KvaEnd); 704 705 /* 706 * The DMAP region is not included in [KvaStart, KvaEnd) 707 */ 708 #if 0 709 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 710 pa = DMAP_TO_PHYS(va); 711 } else { 712 #endif 713 pde = *vtopde(va); 714 if (pde & VPTE_PS) { 715 /* JGV */ 716 pa = (pde & PG_PS_FRAME) | (va & PDRMASK); 717 } else { 718 /* 719 * Beware of a concurrent promotion that changes the 720 * PDE at this point! For example, vtopte() must not 721 * be used to access the PTE because it would use the 722 * new PDE. It is, however, safe to use the old PDE 723 * because the page table page is preserved by the 724 * promotion. 725 */ 726 pa = *pmap_pde_to_pte(&pde, va); 727 pa = (pa & VPTE_FRAME) | (va & PAGE_MASK); 728 } 729 #if 0 730 } 731 #endif 732 return pa; 733 } 734 735 /*************************************************** 736 * Low level mapping routines..... 737 ***************************************************/ 738 739 /* 740 * Enter a mapping into kernel_pmap. Mappings created in this fashion 741 * are not managed. Mappings must be immediately accessible on all cpus. 742 * 743 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the 744 * real pmap and handle related races before storing the new vpte. The 745 * new semantics for kenter require use to do an UNCONDITIONAL invalidation, 746 * because the entry may have previously been cleared without an invalidation. 747 */ 748 void 749 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 750 { 751 pt_entry_t *ptep; 752 pt_entry_t npte; 753 754 KKASSERT(va >= KvaStart && va < KvaEnd); 755 npte = pa | VPTE_RW | VPTE_V | VPTE_U; 756 ptep = vtopte(va); 757 758 #if 1 759 pmap_inval_pte(ptep, kernel_pmap, va); 760 #else 761 if (*pte & VPTE_V) 762 pmap_inval_pte(ptep, kernel_pmap, va); 763 #endif 764 atomic_swap_long(ptep, npte); 765 } 766 767 /* 768 * Enter an unmanaged KVA mapping for the private use of the current 769 * cpu only. 770 * 771 * It is illegal for the mapping to be accessed by other cpus without 772 * proper invalidation. 773 */ 774 int 775 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) 776 { 777 pt_entry_t *ptep; 778 pt_entry_t npte; 779 int res; 780 781 KKASSERT(va >= KvaStart && va < KvaEnd); 782 783 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 784 ptep = vtopte(va); 785 786 #if 1 787 pmap_inval_pte_quick(ptep, kernel_pmap, va); 788 res = 1; 789 #else 790 /* FUTURE */ 791 res = (*ptep != 0); 792 if (*pte & VPTE_V) 793 pmap_inval_pte(pte, kernel_pmap, va); 794 #endif 795 atomic_swap_long(ptep, npte); 796 797 return res; 798 } 799 800 /* 801 * Invalidation will occur later, ok to be lazy here. 802 */ 803 int 804 pmap_kenter_noinval(vm_offset_t va, vm_paddr_t pa) 805 { 806 pt_entry_t *ptep; 807 pt_entry_t npte; 808 int res; 809 810 KKASSERT(va >= KvaStart && va < KvaEnd); 811 812 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 813 ptep = vtopte(va); 814 #if 1 815 res = 1; 816 #else 817 /* FUTURE */ 818 res = (*ptep != 0); 819 #endif 820 atomic_swap_long(ptep, npte); 821 822 return res; 823 } 824 825 /* 826 * Remove an unmanaged mapping created with pmap_kenter*(). 827 */ 828 void 829 pmap_kremove(vm_offset_t va) 830 { 831 pt_entry_t *ptep; 832 833 KKASSERT(va >= KvaStart && va < KvaEnd); 834 835 ptep = vtopte(va); 836 atomic_swap_long(ptep, 0); 837 pmap_inval_pte(ptep, kernel_pmap, va); 838 } 839 840 /* 841 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize 842 * only with this cpu. 843 * 844 * Unfortunately because we optimize new entries by testing VPTE_V later 845 * on, we actually still have to synchronize with all the cpus. XXX maybe 846 * store a junk value and test against 0 in the other places instead? 847 */ 848 void 849 pmap_kremove_quick(vm_offset_t va) 850 { 851 pt_entry_t *ptep; 852 853 KKASSERT(va >= KvaStart && va < KvaEnd); 854 855 ptep = vtopte(va); 856 atomic_swap_long(ptep, 0); 857 pmap_inval_pte(ptep, kernel_pmap, va); /* NOT _quick */ 858 } 859 860 /* 861 * Invalidation will occur later, ok to be lazy here. 862 */ 863 void 864 pmap_kremove_noinval(vm_offset_t va) 865 { 866 pt_entry_t *ptep; 867 868 KKASSERT(va >= KvaStart && va < KvaEnd); 869 870 ptep = vtopte(va); 871 atomic_swap_long(ptep, 0); 872 } 873 874 /* 875 * Used to map a range of physical addresses into kernel 876 * virtual address space. 877 * 878 * For now, VM is already on, we only need to map the 879 * specified memory. 880 */ 881 vm_offset_t 882 pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) 883 { 884 return PHYS_TO_DMAP(start); 885 } 886 887 /* 888 * Map a set of unmanaged VM pages into KVM. 889 */ 890 static __inline void 891 _pmap_qenter(vm_offset_t beg_va, vm_page_t *m, int count, int doinval) 892 { 893 vm_offset_t end_va; 894 vm_offset_t va; 895 896 end_va = beg_va + count * PAGE_SIZE; 897 KKASSERT(beg_va >= KvaStart && end_va <= KvaEnd); 898 899 for (va = beg_va; va < end_va; va += PAGE_SIZE) { 900 pt_entry_t *ptep; 901 902 ptep = vtopte(va); 903 atomic_swap_long(ptep, VM_PAGE_TO_PHYS(*m) | 904 VPTE_RW | VPTE_V | VPTE_U); 905 ++m; 906 } 907 if (doinval) 908 pmap_invalidate_range(kernel_pmap, beg_va, end_va); 909 /* pmap_inval_pte(pte, kernel_pmap, va); */ 910 } 911 912 void 913 pmap_qenter(vm_offset_t beg_va, vm_page_t *m, int count) 914 { 915 _pmap_qenter(beg_va, m, count, 1); 916 } 917 918 void 919 pmap_qenter_noinval(vm_offset_t beg_va, vm_page_t *m, int count) 920 { 921 _pmap_qenter(beg_va, m, count, 0); 922 } 923 924 /* 925 * Undo the effects of pmap_qenter*(). 926 */ 927 void 928 pmap_qremove(vm_offset_t beg_va, int count) 929 { 930 vm_offset_t end_va; 931 vm_offset_t va; 932 933 end_va = beg_va + count * PAGE_SIZE; 934 KKASSERT(beg_va >= KvaStart && end_va < KvaEnd); 935 936 for (va = beg_va; va < end_va; va += PAGE_SIZE) { 937 pt_entry_t *ptep; 938 939 ptep = vtopte(va); 940 atomic_swap_long(ptep, 0); 941 } 942 pmap_invalidate_range(kernel_pmap, beg_va, end_va); 943 } 944 945 /* 946 * Unlike the real pmap code, we can't avoid calling the real-kernel. 947 */ 948 void 949 pmap_qremove_quick(vm_offset_t va, int count) 950 { 951 pmap_qremove(va, count); 952 } 953 954 void 955 pmap_qremove_noinval(vm_offset_t va, int count) 956 { 957 pmap_qremove(va, count); 958 } 959 960 /* 961 * This routine works like vm_page_lookup() but also blocks as long as the 962 * page is busy. This routine does not busy the page it returns. 963 * 964 * Unless the caller is managing objects whos pages are in a known state, 965 * the call should be made with a critical section held so the page's object 966 * association remains valid on return. 967 */ 968 static vm_page_t 969 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 970 { 971 vm_page_t m; 972 973 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 974 m = vm_page_lookup_busy_wait(object, pindex, TRUE, "pplookp"); 975 976 return(m); 977 } 978 979 /* 980 * Create a new thread and optionally associate it with a (new) process. 981 * NOTE! the new thread's cpu may not equal the current cpu. 982 */ 983 void 984 pmap_init_thread(thread_t td) 985 { 986 /* enforce pcb placement */ 987 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; 988 td->td_savefpu = &td->td_pcb->pcb_save; 989 td->td_sp = (char *)td->td_pcb - 16; /* JG is -16 needed on x86_64? */ 990 } 991 992 /* 993 * This routine directly affects the fork perf for a process. 994 */ 995 void 996 pmap_init_proc(struct proc *p) 997 { 998 } 999 1000 /* 1001 * Unwire a page table which has been removed from the pmap. We own the 1002 * wire_count, so the page cannot go away. The page representing the page 1003 * table is passed in unbusied and must be busied if we cannot trivially 1004 * unwire it. 1005 * 1006 * XXX NOTE! This code is not usually run because we do not currently 1007 * implement dynamic page table page removal. The page in 1008 * its parent assumes at least 1 wire count, so no call to this 1009 * function ever sees a wire count less than 2. 1010 */ 1011 static int 1012 pmap_unwire_pgtable(pmap_t pmap, vm_offset_t va, vm_page_t m) 1013 { 1014 /* 1015 * Try to unwire optimally. If non-zero is returned the wire_count 1016 * is 1 and we must busy the page to unwire it. 1017 */ 1018 if (vm_page_unwire_quick(m) == 0) 1019 return 0; 1020 1021 vm_page_busy_wait(m, TRUE, "pmuwpt"); 1022 KASSERT(m->queue == PQ_NONE, 1023 ("_pmap_unwire_pgtable: %p->queue != PQ_NONE", m)); 1024 1025 if (m->wire_count == 1) { 1026 /* 1027 * Unmap the page table page. 1028 */ 1029 /* pmap_inval_add(info, pmap, -1); */ 1030 1031 if (m->pindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1032 /* PDP page */ 1033 pml4_entry_t *pml4; 1034 pml4 = pmap_pml4e(pmap, va); 1035 *pml4 = 0; 1036 } else if (m->pindex >= NUPT_TOTAL) { 1037 /* PD page */ 1038 pdp_entry_t *pdp; 1039 pdp = pmap_pdpe(pmap, va); 1040 *pdp = 0; 1041 } else { 1042 /* PT page */ 1043 pd_entry_t *pd; 1044 pd = pmap_pde(pmap, va); 1045 *pd = 0; 1046 } 1047 1048 KKASSERT(pmap->pm_stats.resident_count > 0); 1049 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1050 1051 if (pmap->pm_ptphint == m) 1052 pmap->pm_ptphint = NULL; 1053 1054 if (m->pindex < NUPT_TOTAL) { 1055 /* We just released a PT, unhold the matching PD */ 1056 vm_page_t pdpg; 1057 1058 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & 1059 VPTE_FRAME); 1060 pmap_unwire_pgtable(pmap, va, pdpg); 1061 } 1062 if (m->pindex >= NUPT_TOTAL && 1063 m->pindex < (NUPT_TOTAL + NUPD_TOTAL)) { 1064 /* We just released a PD, unhold the matching PDP */ 1065 vm_page_t pdppg; 1066 1067 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & 1068 VPTE_FRAME); 1069 pmap_unwire_pgtable(pmap, va, pdppg); 1070 } 1071 1072 /* 1073 * This was our last wire, the page had better be unwired 1074 * after we decrement wire_count. 1075 * 1076 * FUTURE NOTE: shared page directory page could result in 1077 * multiple wire counts. 1078 */ 1079 vm_page_unwire(m, 0); 1080 KKASSERT(m->wire_count == 0); 1081 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1082 vm_page_flash(m); 1083 vm_page_free(m); 1084 return 1; 1085 } else { 1086 /* XXX SMP race to 1 if not holding vmobj */ 1087 vm_page_unwire(m, 0); 1088 vm_page_wakeup(m); 1089 return 0; 1090 } 1091 } 1092 1093 /* 1094 * After removing a page table entry, this routine is used to 1095 * conditionally free the page, and manage the hold/wire counts. 1096 * 1097 * If not NULL the caller owns a wire_count on mpte, so it can't disappear. 1098 * If NULL the caller owns a wire_count on what would be the mpte, we must 1099 * look it up. 1100 */ 1101 static int 1102 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1103 { 1104 vm_pindex_t ptepindex; 1105 1106 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1107 1108 if (mpte == NULL) { 1109 /* 1110 * page table pages in the kernel_pmap are not managed. 1111 */ 1112 if (pmap == kernel_pmap) 1113 return(0); 1114 ptepindex = pmap_pt_pindex(va); 1115 if (pmap->pm_ptphint && 1116 (pmap->pm_ptphint->pindex == ptepindex)) { 1117 mpte = pmap->pm_ptphint; 1118 } else { 1119 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1120 pmap->pm_ptphint = mpte; 1121 vm_page_wakeup(mpte); 1122 } 1123 } 1124 return pmap_unwire_pgtable(pmap, va, mpte); 1125 } 1126 1127 /* 1128 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we 1129 * just dummy it up so it works well enough for fork(). 1130 * 1131 * In DragonFly, process pmaps may only be used to manipulate user address 1132 * space, never kernel address space. 1133 */ 1134 void 1135 pmap_pinit0(struct pmap *pmap) 1136 { 1137 pmap_pinit(pmap); 1138 } 1139 1140 /* 1141 * Initialize a preallocated and zeroed pmap structure, 1142 * such as one in a vmspace structure. 1143 */ 1144 void 1145 pmap_pinit(struct pmap *pmap) 1146 { 1147 vm_page_t ptdpg; 1148 1149 /* 1150 * No need to allocate page table space yet but we do need a valid 1151 * page directory table. 1152 */ 1153 if (pmap->pm_pml4 == NULL) { 1154 pmap->pm_pml4 = (pml4_entry_t *) 1155 kmem_alloc_pageable(kernel_map, PAGE_SIZE, 1156 VM_SUBSYS_PML4); 1157 } 1158 1159 /* 1160 * Allocate an object for the ptes 1161 */ 1162 if (pmap->pm_pteobj == NULL) 1163 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL + 1); 1164 1165 /* 1166 * Allocate the page directory page, unless we already have 1167 * one cached. If we used the cached page the wire_count will 1168 * already be set appropriately. 1169 */ 1170 if ((ptdpg = pmap->pm_pdirm) == NULL) { 1171 ptdpg = vm_page_grab(pmap->pm_pteobj, 1172 NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL, 1173 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | 1174 VM_ALLOC_ZERO); 1175 pmap->pm_pdirm = ptdpg; 1176 vm_page_flag_clear(ptdpg, PG_MAPPED | PG_WRITEABLE); 1177 vm_page_wire(ptdpg); 1178 vm_page_wakeup(ptdpg); 1179 pmap_kenter((vm_offset_t)pmap->pm_pml4, VM_PAGE_TO_PHYS(ptdpg)); 1180 } 1181 pmap->pm_count = 1; 1182 CPUMASK_ASSZERO(pmap->pm_active); 1183 pmap->pm_ptphint = NULL; 1184 RB_INIT(&pmap->pm_pvroot); 1185 spin_init(&pmap->pm_spin, "pmapinit"); 1186 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1187 pmap->pm_stats.resident_count = 1; 1188 pmap->pm_stats.wired_count = 1; 1189 } 1190 1191 /* 1192 * Clean up a pmap structure so it can be physically freed. This routine 1193 * is called by the vmspace dtor function. A great deal of pmap data is 1194 * left passively mapped to improve vmspace management so we have a bit 1195 * of cleanup work to do here. 1196 * 1197 * No requirements. 1198 */ 1199 void 1200 pmap_puninit(pmap_t pmap) 1201 { 1202 vm_page_t p; 1203 1204 KKASSERT(CPUMASK_TESTZERO(pmap->pm_active)); 1205 if ((p = pmap->pm_pdirm) != NULL) { 1206 KKASSERT(pmap->pm_pml4 != NULL); 1207 pmap_kremove((vm_offset_t)pmap->pm_pml4); 1208 vm_page_busy_wait(p, TRUE, "pgpun"); 1209 vm_page_unwire(p, 0); 1210 vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); 1211 vm_page_free(p); 1212 pmap->pm_pdirm = NULL; 1213 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1214 KKASSERT(pmap->pm_stats.wired_count == 0); 1215 } 1216 if (pmap->pm_pml4) { 1217 kmem_free(kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE); 1218 pmap->pm_pml4 = NULL; 1219 } 1220 if (pmap->pm_pteobj) { 1221 vm_object_deallocate(pmap->pm_pteobj); 1222 pmap->pm_pteobj = NULL; 1223 } 1224 } 1225 1226 /* 1227 * This function is now unused (used to add the pmap to the pmap_list) 1228 */ 1229 void 1230 pmap_pinit2(struct pmap *pmap) 1231 { 1232 } 1233 1234 /* 1235 * Attempt to release and free a vm_page in a pmap. Returns 1 on success, 1236 * 0 on failure (if the procedure had to sleep). 1237 * 1238 * When asked to remove the page directory page itself, we actually just 1239 * leave it cached so we do not have to incur the SMP inval overhead of 1240 * removing the kernel mapping. pmap_puninit() will take care of it. 1241 */ 1242 static int 1243 pmap_release_free_page(struct pmap *pmap, vm_page_t p) 1244 { 1245 /* 1246 * This code optimizes the case of freeing non-busy 1247 * page-table pages. Those pages are zero now, and 1248 * might as well be placed directly into the zero queue. 1249 */ 1250 if (vm_page_busy_try(p, TRUE)) { 1251 vm_page_sleep_busy(p, TRUE, "pmaprl"); 1252 return 1; 1253 } 1254 1255 /* 1256 * Remove the page table page from the processes address space. 1257 */ 1258 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1259 /* 1260 * We are the pml4 table itself. 1261 */ 1262 /* XXX anything to do here? */ 1263 } else if (p->pindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1264 /* 1265 * We are a PDP page. 1266 * We look for the PML4 entry that points to us. 1267 */ 1268 vm_page_t m4; 1269 pml4_entry_t *pml4; 1270 int idx; 1271 1272 m4 = vm_page_lookup(pmap->pm_pteobj, 1273 NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL); 1274 KKASSERT(m4 != NULL); 1275 pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m4)); 1276 idx = (p->pindex - (NUPT_TOTAL + NUPD_TOTAL)) % NPML4EPG; 1277 KKASSERT(pml4[idx] != 0); 1278 if (pml4[idx] == 0) 1279 kprintf("pmap_release: Unmapped PML4\n"); 1280 pml4[idx] = 0; 1281 vm_page_unwire_quick(m4); 1282 } else if (p->pindex >= NUPT_TOTAL) { 1283 /* 1284 * We are a PD page. 1285 * We look for the PDP entry that points to us. 1286 */ 1287 vm_page_t m3; 1288 pdp_entry_t *pdp; 1289 int idx; 1290 1291 m3 = vm_page_lookup(pmap->pm_pteobj, 1292 NUPT_TOTAL + NUPD_TOTAL + 1293 (p->pindex - NUPT_TOTAL) / NPDPEPG); 1294 KKASSERT(m3 != NULL); 1295 pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m3)); 1296 idx = (p->pindex - NUPT_TOTAL) % NPDPEPG; 1297 KKASSERT(pdp[idx] != 0); 1298 if (pdp[idx] == 0) 1299 kprintf("pmap_release: Unmapped PDP %d\n", idx); 1300 pdp[idx] = 0; 1301 vm_page_unwire_quick(m3); 1302 } else { 1303 /* We are a PT page. 1304 * We look for the PD entry that points to us. 1305 */ 1306 vm_page_t m2; 1307 pd_entry_t *pd; 1308 int idx; 1309 1310 m2 = vm_page_lookup(pmap->pm_pteobj, 1311 NUPT_TOTAL + p->pindex / NPDEPG); 1312 KKASSERT(m2 != NULL); 1313 pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m2)); 1314 idx = p->pindex % NPDEPG; 1315 if (pd[idx] == 0) 1316 kprintf("pmap_release: Unmapped PD %d\n", idx); 1317 pd[idx] = 0; 1318 vm_page_unwire_quick(m2); 1319 } 1320 KKASSERT(pmap->pm_stats.resident_count > 0); 1321 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1322 1323 if (p->wire_count > 1) { 1324 panic("pmap_release: freeing held pt page " 1325 "pmap=%p pg=%p dmap=%p pi=%ld {%ld,%ld,%ld}", 1326 pmap, p, (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(p)), 1327 p->pindex, NUPT_TOTAL, NUPD_TOTAL, NUPDP_TOTAL); 1328 } 1329 1330 if (pmap->pm_ptphint == p) 1331 pmap->pm_ptphint = NULL; 1332 1333 /* 1334 * We leave the top-level page table page cached, wired, and mapped in 1335 * the pmap until the dtor function (pmap_puninit()) gets called. 1336 * However, still clean it up. 1337 */ 1338 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1339 bzero(pmap->pm_pml4, PAGE_SIZE); 1340 vm_page_wakeup(p); 1341 } else { 1342 vm_page_unwire(p, 0); 1343 vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); 1344 vm_page_free(p); 1345 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1346 } 1347 return 0; 1348 } 1349 1350 /* 1351 * Locate the requested PT, PD, or PDP page table page. 1352 * 1353 * Returns a busied page, caller must vm_page_wakeup() when done. 1354 */ 1355 static vm_page_t 1356 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex) 1357 { 1358 vm_page_t m; 1359 vm_page_t pm; 1360 vm_pindex_t pindex; 1361 pt_entry_t *ptep; 1362 pt_entry_t data; 1363 1364 /* 1365 * Find or fabricate a new pagetable page. A non-zero wire_count 1366 * indicates that the page has already been mapped into its parent. 1367 */ 1368 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1369 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1370 if (m->wire_count != 0) 1371 return m; 1372 1373 /* 1374 * Map the page table page into its parent, giving it 1 wire count. 1375 */ 1376 vm_page_wire(m); 1377 vm_page_unqueue(m); 1378 atomic_add_long(&pmap->pm_stats.resident_count, 1); 1379 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1380 1381 data = VM_PAGE_TO_PHYS(m) | 1382 VPTE_RW | VPTE_V | VPTE_U | VPTE_A | VPTE_M | VPTE_WIRED; 1383 atomic_add_long(&pmap->pm_stats.wired_count, 1); 1384 1385 if (ptepindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1386 /* 1387 * Map PDP into the PML4 1388 */ 1389 pindex = ptepindex - (NUPT_TOTAL + NUPD_TOTAL); 1390 pindex &= (NUPDP_TOTAL - 1); 1391 ptep = (pt_entry_t *)pmap->pm_pml4; 1392 pm = NULL; 1393 } else if (ptepindex >= NUPT_TOTAL) { 1394 /* 1395 * Map PD into its PDP 1396 */ 1397 pindex = (ptepindex - NUPT_TOTAL) >> NPDPEPGSHIFT; 1398 pindex += NUPT_TOTAL + NUPD_TOTAL; 1399 pm = _pmap_allocpte(pmap, pindex); 1400 pindex = (ptepindex - NUPT_TOTAL) & (NPDPEPG - 1); 1401 ptep = (void *)PHYS_TO_DMAP(pm->phys_addr); 1402 } else { 1403 /* 1404 * Map PT into its PD 1405 */ 1406 pindex = ptepindex >> NPDPEPGSHIFT; 1407 pindex += NUPT_TOTAL; 1408 pm = _pmap_allocpte(pmap, pindex); 1409 pindex = ptepindex & (NPTEPG - 1); 1410 ptep = (void *)PHYS_TO_DMAP(pm->phys_addr); 1411 } 1412 1413 /* 1414 * Install the pte in (pm). (m) prevents races. 1415 */ 1416 ptep += pindex; 1417 data = atomic_swap_long(ptep, data); 1418 if (pm) { 1419 vm_page_wire_quick(pm); 1420 vm_page_wakeup(pm); 1421 } 1422 pmap->pm_ptphint = pm; 1423 1424 return m; 1425 } 1426 1427 /* 1428 * Determine the page table page required to access the VA in the pmap 1429 * and allocate it if necessary. Return a held vm_page_t for the page. 1430 * 1431 * Only used with user pmaps. 1432 */ 1433 static vm_page_t 1434 pmap_allocpte(pmap_t pmap, vm_offset_t va) 1435 { 1436 vm_pindex_t ptepindex; 1437 vm_page_t m; 1438 1439 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1440 1441 /* 1442 * Calculate pagetable page index, and return the PT page to 1443 * the caller. 1444 */ 1445 ptepindex = pmap_pt_pindex(va); 1446 m = _pmap_allocpte(pmap, ptepindex); 1447 1448 return m; 1449 } 1450 1451 /*************************************************** 1452 * Pmap allocation/deallocation routines. 1453 ***************************************************/ 1454 1455 /* 1456 * Release any resources held by the given physical map. 1457 * Called when a pmap initialized by pmap_pinit is being released. 1458 * Should only be called if the map contains no valid mappings. 1459 */ 1460 static int pmap_release_callback(struct vm_page *p, void *data); 1461 1462 void 1463 pmap_release(struct pmap *pmap) 1464 { 1465 vm_object_t object = pmap->pm_pteobj; 1466 struct rb_vm_page_scan_info info; 1467 1468 KKASSERT(pmap != kernel_pmap); 1469 1470 #if defined(DIAGNOSTIC) 1471 if (object->ref_count != 1) 1472 panic("pmap_release: pteobj reference count != 1"); 1473 #endif 1474 1475 info.pmap = pmap; 1476 info.object = object; 1477 1478 KASSERT(CPUMASK_TESTZERO(pmap->pm_active), 1479 ("pmap %p still active! %016jx", 1480 pmap, 1481 (uintmax_t)CPUMASK_LOWMASK(pmap->pm_active))); 1482 1483 vm_object_hold(object); 1484 do { 1485 info.error = 0; 1486 info.mpte = NULL; 1487 info.limit = object->generation; 1488 1489 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, 1490 pmap_release_callback, &info); 1491 if (info.error == 0 && info.mpte) { 1492 if (pmap_release_free_page(pmap, info.mpte)) 1493 info.error = 1; 1494 } 1495 } while (info.error); 1496 1497 pmap->pm_ptphint = NULL; 1498 1499 KASSERT((pmap->pm_stats.wired_count == (pmap->pm_pdirm != NULL)), 1500 ("pmap_release: dangling count %p %ld", 1501 pmap, pmap->pm_stats.wired_count)); 1502 1503 vm_object_drop(object); 1504 } 1505 1506 static int 1507 pmap_release_callback(struct vm_page *p, void *data) 1508 { 1509 struct rb_vm_page_scan_info *info = data; 1510 1511 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1512 info->mpte = p; 1513 return(0); 1514 } 1515 if (pmap_release_free_page(info->pmap, p)) { 1516 info->error = 1; 1517 return(-1); 1518 } 1519 if (info->object->generation != info->limit) { 1520 info->error = 1; 1521 return(-1); 1522 } 1523 return(0); 1524 } 1525 1526 /* 1527 * Grow the number of kernel page table entries, if needed. 1528 * 1529 * kernel_map must be locked exclusively by the caller. 1530 */ 1531 void 1532 pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) 1533 { 1534 vm_offset_t addr; 1535 vm_paddr_t paddr; 1536 vm_offset_t ptppaddr; 1537 vm_page_t nkpg; 1538 pd_entry_t *pde, newpdir; 1539 pdp_entry_t newpdp; 1540 1541 addr = kend; 1542 1543 vm_object_hold(&kptobj); 1544 if (kernel_vm_end == 0) { 1545 kernel_vm_end = KvaStart; 1546 nkpt = 0; 1547 while ((*pmap_pde(kernel_pmap, kernel_vm_end) & VPTE_V) != 0) { 1548 kernel_vm_end = 1549 rounddown2(kernel_vm_end + PAGE_SIZE * NPTEPG, 1550 PAGE_SIZE * NPTEPG); 1551 nkpt++; 1552 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1553 kernel_vm_end = vm_map_max(kernel_map); 1554 break; 1555 } 1556 } 1557 } 1558 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1559 if (addr - 1 >= vm_map_max(kernel_map)) 1560 addr = vm_map_max(kernel_map); 1561 while (kernel_vm_end < addr) { 1562 pde = pmap_pde(kernel_pmap, kernel_vm_end); 1563 if (pde == NULL) { 1564 /* We need a new PDP entry */ 1565 nkpg = vm_page_alloc(&kptobj, nkpt, 1566 VM_ALLOC_NORMAL | 1567 VM_ALLOC_SYSTEM | 1568 VM_ALLOC_INTERRUPT); 1569 if (nkpg == NULL) { 1570 panic("pmap_growkernel: no memory to " 1571 "grow kernel"); 1572 } 1573 paddr = VM_PAGE_TO_PHYS(nkpg); 1574 pmap_zero_page(paddr); 1575 newpdp = (pdp_entry_t)(paddr | 1576 VPTE_V | VPTE_RW | VPTE_U | 1577 VPTE_A | VPTE_M | VPTE_WIRED); 1578 *pmap_pdpe(kernel_pmap, kernel_vm_end) = newpdp; 1579 atomic_add_long(&kernel_pmap->pm_stats.wired_count, 1); 1580 nkpt++; 1581 continue; /* try again */ 1582 } 1583 if ((*pde & VPTE_V) != 0) { 1584 kernel_vm_end = 1585 rounddown2(kernel_vm_end + PAGE_SIZE * NPTEPG, 1586 PAGE_SIZE * NPTEPG); 1587 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1588 kernel_vm_end = vm_map_max(kernel_map); 1589 break; 1590 } 1591 continue; 1592 } 1593 1594 /* 1595 * This index is bogus, but out of the way 1596 */ 1597 nkpg = vm_page_alloc(&kptobj, nkpt, 1598 VM_ALLOC_NORMAL | 1599 VM_ALLOC_SYSTEM | 1600 VM_ALLOC_INTERRUPT); 1601 if (nkpg == NULL) 1602 panic("pmap_growkernel: no memory to grow kernel"); 1603 1604 vm_page_wire(nkpg); 1605 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1606 pmap_zero_page(ptppaddr); 1607 newpdir = (pd_entry_t)(ptppaddr | 1608 VPTE_V | VPTE_RW | VPTE_U | 1609 VPTE_A | VPTE_M | VPTE_WIRED); 1610 *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir; 1611 atomic_add_long(&kernel_pmap->pm_stats.wired_count, 1); 1612 nkpt++; 1613 1614 kernel_vm_end = 1615 rounddown2(kernel_vm_end + PAGE_SIZE * NPTEPG, 1616 PAGE_SIZE * NPTEPG); 1617 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1618 kernel_vm_end = vm_map_max(kernel_map); 1619 break; 1620 } 1621 } 1622 vm_object_drop(&kptobj); 1623 } 1624 1625 /* 1626 * Add a reference to the specified pmap. 1627 * 1628 * No requirements. 1629 */ 1630 void 1631 pmap_reference(pmap_t pmap) 1632 { 1633 if (pmap) 1634 atomic_add_int(&pmap->pm_count, 1); 1635 } 1636 1637 /************************************************************************ 1638 * VMSPACE MANAGEMENT * 1639 ************************************************************************ 1640 * 1641 * The VMSPACE management we do in our virtual kernel must be reflected 1642 * in the real kernel. This is accomplished by making vmspace system 1643 * calls to the real kernel. 1644 */ 1645 void 1646 cpu_vmspace_alloc(struct vmspace *vm) 1647 { 1648 int r; 1649 void *rp; 1650 vpte_t vpte; 1651 1652 #define USER_SIZE (VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) 1653 1654 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) 1655 panic("vmspace_create() failed"); 1656 1657 rp = vmspace_mmap(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1658 PROT_READ|PROT_WRITE|PROT_EXEC, 1659 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, 1660 MemImageFd, 0); 1661 if (rp == MAP_FAILED) 1662 panic("vmspace_mmap: failed"); 1663 vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1664 MADV_NOSYNC, 0); 1665 vpte = VM_PAGE_TO_PHYS(vmspace_pmap(vm)->pm_pdirm) | 1666 VPTE_RW | VPTE_V | VPTE_U; 1667 r = vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1668 MADV_SETMAP, vpte); 1669 if (r < 0) 1670 panic("vmspace_mcontrol: failed"); 1671 } 1672 1673 void 1674 cpu_vmspace_free(struct vmspace *vm) 1675 { 1676 if (vmspace_destroy(&vm->vm_pmap) < 0) 1677 panic("vmspace_destroy() failed"); 1678 } 1679 1680 /*************************************************** 1681 * page management routines. 1682 ***************************************************/ 1683 1684 /* 1685 * free the pv_entry back to the free list. This function may be 1686 * called from an interrupt. 1687 */ 1688 static __inline void 1689 free_pv_entry(pv_entry_t pv) 1690 { 1691 atomic_add_long(&pv_entry_count, -1); 1692 zfree(pvzone, pv); 1693 } 1694 1695 /* 1696 * get a new pv_entry, allocating a block from the system 1697 * when needed. This function may be called from an interrupt. 1698 */ 1699 static pv_entry_t 1700 get_pv_entry(void) 1701 { 1702 atomic_add_long(&pv_entry_count, 1); 1703 if (pv_entry_high_water && 1704 (pv_entry_count > pv_entry_high_water) && 1705 atomic_swap_int(&pmap_pagedaemon_waken, 1) == 0) { 1706 wakeup(&vm_pages_needed); 1707 } 1708 return zalloc(pvzone); 1709 } 1710 1711 /* 1712 * This routine is very drastic, but can save the system 1713 * in a pinch. 1714 * 1715 * No requirements. 1716 */ 1717 void 1718 pmap_collect(void) 1719 { 1720 int i; 1721 vm_page_t m; 1722 static int warningdone=0; 1723 1724 if (pmap_pagedaemon_waken == 0) 1725 return; 1726 pmap_pagedaemon_waken = 0; 1727 1728 if (warningdone < 5) { 1729 kprintf("pmap_collect: collecting pv entries -- " 1730 "suggest increasing PMAP_SHPGPERPROC\n"); 1731 warningdone++; 1732 } 1733 1734 for (i = 0; i < vm_page_array_size; i++) { 1735 m = &vm_page_array[i]; 1736 if (m->wire_count || m->hold_count) 1737 continue; 1738 if (vm_page_busy_try(m, TRUE) == 0) { 1739 if (m->wire_count == 0 && m->hold_count == 0) { 1740 pmap_remove_all(m); 1741 } 1742 vm_page_wakeup(m); 1743 } 1744 } 1745 } 1746 1747 1748 /* 1749 * If it is the first entry on the list, it is actually 1750 * in the header and we must copy the following entry up 1751 * to the header. Otherwise we must search the list for 1752 * the entry. In either case we free the now unused entry. 1753 * 1754 * pmap->pm_pteobj must be held and (m) must be spin-locked by the caller. 1755 */ 1756 static int 1757 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1758 { 1759 pv_entry_t pv; 1760 int rtval; 1761 1762 vm_page_spin_lock(m); 1763 pv = pv_entry_rb_tree_RB_LOOKUP(&pmap->pm_pvroot, va); 1764 1765 /* 1766 * Note that pv_ptem is NULL if the page table page itself is not 1767 * managed, even if the page being removed IS managed. 1768 */ 1769 rtval = 0; 1770 if (pv) { 1771 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1772 if (TAILQ_EMPTY(&m->md.pv_list)) 1773 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1774 m->md.pv_list_count--; 1775 KKASSERT(m->md.pv_list_count >= 0); 1776 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 1777 atomic_add_int(&pmap->pm_generation, 1); 1778 vm_page_spin_unlock(m); 1779 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1780 free_pv_entry(pv); 1781 } else { 1782 vm_page_spin_unlock(m); 1783 kprintf("pmap_remove_entry: could not find " 1784 "pmap=%p m=%p va=%016jx\n", 1785 pmap, m, va); 1786 } 1787 return rtval; 1788 } 1789 1790 /* 1791 * Create a pv entry for page at pa for (pmap, va). If the page table page 1792 * holding the VA is managed, mpte will be non-NULL. 1793 * 1794 * pmap->pm_pteobj must be held and (m) must be spin-locked by the caller. 1795 */ 1796 static void 1797 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m, 1798 pv_entry_t pv) 1799 { 1800 pv->pv_va = va; 1801 pv->pv_pmap = pmap; 1802 pv->pv_ptem = mpte; 1803 1804 m->md.pv_list_count++; 1805 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1806 pv = pv_entry_rb_tree_RB_INSERT(&pmap->pm_pvroot, pv); 1807 vm_page_flag_set(m, PG_MAPPED); 1808 KKASSERT(pv == NULL); 1809 } 1810 1811 /* 1812 * pmap_remove_pte: do the things to unmap a page in a process 1813 * 1814 * Caller holds pmap->pm_pteobj and holds the associated page table 1815 * page busy to prevent races. 1816 */ 1817 static int 1818 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, pt_entry_t oldpte, 1819 vm_offset_t va) 1820 { 1821 vm_page_t m; 1822 int error; 1823 1824 if (ptq) 1825 oldpte = pmap_inval_loadandclear(ptq, pmap, va); 1826 1827 if (oldpte & VPTE_WIRED) 1828 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1829 KKASSERT(pmap->pm_stats.wired_count >= 0); 1830 1831 #if 0 1832 /* 1833 * Machines that don't support invlpg, also don't support 1834 * PG_G. XXX PG_G is disabled for SMP so don't worry about 1835 * the SMP case. 1836 */ 1837 if (oldpte & PG_G) 1838 cpu_invlpg((void *)va); 1839 #endif 1840 KKASSERT(pmap->pm_stats.resident_count > 0); 1841 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1842 if (oldpte & VPTE_MANAGED) { 1843 m = PHYS_TO_VM_PAGE(oldpte); 1844 1845 /* 1846 * NOTE: pmap_remove_entry() will spin-lock the page 1847 */ 1848 if (oldpte & VPTE_M) { 1849 #if defined(PMAP_DIAGNOSTIC) 1850 if (pmap_nw_modified(oldpte)) { 1851 kprintf("pmap_remove: modified page not " 1852 "writable: va: 0x%lx, pte: 0x%lx\n", 1853 va, oldpte); 1854 } 1855 #endif 1856 pmap_track_modified(pmap, va); 1857 vm_page_dirty(m); 1858 } 1859 if (oldpte & VPTE_A) 1860 vm_page_flag_set(m, PG_REFERENCED); 1861 error = pmap_remove_entry(pmap, m, va); 1862 } else { 1863 error = pmap_unuse_pt(pmap, va, NULL); 1864 } 1865 return error; 1866 } 1867 1868 /* 1869 * pmap_remove_page: 1870 * 1871 * Remove a single page from a process address space. 1872 * 1873 * This function may not be called from an interrupt if the pmap is 1874 * not kernel_pmap. 1875 * 1876 * Caller holds pmap->pm_pteobj 1877 */ 1878 static void 1879 pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1880 { 1881 pt_entry_t *pte; 1882 1883 pte = pmap_pte(pmap, va); 1884 if (pte == NULL) 1885 return; 1886 if ((*pte & VPTE_V) == 0) 1887 return; 1888 pmap_remove_pte(pmap, pte, 0, va); 1889 } 1890 1891 /* 1892 * Remove the given range of addresses from the specified map. 1893 * 1894 * It is assumed that the start and end are properly rounded to 1895 * the page size. 1896 * 1897 * This function may not be called from an interrupt if the pmap is 1898 * not kernel_pmap. 1899 * 1900 * No requirements. 1901 */ 1902 void 1903 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1904 { 1905 vm_offset_t va_next; 1906 pml4_entry_t *pml4e; 1907 pdp_entry_t *pdpe; 1908 pd_entry_t ptpaddr, *pde; 1909 pt_entry_t *pte; 1910 vm_page_t pt_m; 1911 1912 if (pmap == NULL) 1913 return; 1914 1915 vm_object_hold(pmap->pm_pteobj); 1916 KKASSERT(pmap->pm_stats.resident_count >= 0); 1917 if (pmap->pm_stats.resident_count == 0) { 1918 vm_object_drop(pmap->pm_pteobj); 1919 return; 1920 } 1921 1922 /* 1923 * special handling of removing one page. a very 1924 * common operation and easy to short circuit some 1925 * code. 1926 */ 1927 if (sva + PAGE_SIZE == eva) { 1928 pde = pmap_pde(pmap, sva); 1929 if (pde && (*pde & VPTE_PS) == 0) { 1930 pmap_remove_page(pmap, sva); 1931 vm_object_drop(pmap->pm_pteobj); 1932 return; 1933 } 1934 } 1935 1936 for (; sva < eva; sva = va_next) { 1937 pml4e = pmap_pml4e(pmap, sva); 1938 if ((*pml4e & VPTE_V) == 0) { 1939 va_next = (sva + NBPML4) & ~PML4MASK; 1940 if (va_next < sva) 1941 va_next = eva; 1942 continue; 1943 } 1944 1945 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 1946 if ((*pdpe & VPTE_V) == 0) { 1947 va_next = (sva + NBPDP) & ~PDPMASK; 1948 if (va_next < sva) 1949 va_next = eva; 1950 continue; 1951 } 1952 1953 /* 1954 * Calculate index for next page table. 1955 */ 1956 va_next = (sva + NBPDR) & ~PDRMASK; 1957 if (va_next < sva) 1958 va_next = eva; 1959 1960 pde = pmap_pdpe_to_pde(pdpe, sva); 1961 ptpaddr = *pde; 1962 1963 /* 1964 * Weed out invalid mappings. 1965 */ 1966 if (ptpaddr == 0) 1967 continue; 1968 1969 /* 1970 * Check for large page. 1971 */ 1972 if ((ptpaddr & VPTE_PS) != 0) { 1973 /* JG FreeBSD has more complex treatment here */ 1974 KKASSERT(*pde != 0); 1975 pmap_inval_pde(pde, pmap, sva); 1976 atomic_add_long(&pmap->pm_stats.resident_count, 1977 -NBPDR / PAGE_SIZE); 1978 continue; 1979 } 1980 1981 /* 1982 * Limit our scan to either the end of the va represented 1983 * by the current page table page, or to the end of the 1984 * range being removed. 1985 */ 1986 if (va_next > eva) 1987 va_next = eva; 1988 1989 /* 1990 * NOTE: pmap_remove_pte() can block. 1991 */ 1992 pt_m = pmap_hold_pt_page(pde, sva); 1993 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1994 sva += PAGE_SIZE) { 1995 if (*pte) { 1996 if (pmap_remove_pte(pmap, pte, 0, sva)) 1997 break; 1998 } 1999 } 2000 vm_page_unhold(pt_m); 2001 } 2002 vm_object_drop(pmap->pm_pteobj); 2003 } 2004 2005 /* 2006 * Removes this physical page from all physical maps in which it resides. 2007 * Reflects back modify bits to the pager. 2008 * 2009 * This routine may not be called from an interrupt. 2010 * 2011 * No requirements. 2012 */ 2013 static void 2014 pmap_remove_all(vm_page_t m) 2015 { 2016 pt_entry_t *pte, tpte; 2017 pv_entry_t pv; 2018 vm_object_t pmobj; 2019 pmap_t pmap; 2020 2021 #if defined(PMAP_DIAGNOSTIC) 2022 /* 2023 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 2024 * pages! 2025 */ 2026 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2027 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); 2028 } 2029 #endif 2030 2031 restart: 2032 vm_page_spin_lock(m); 2033 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2034 pmap = pv->pv_pmap; 2035 pmobj = pmap->pm_pteobj; 2036 2037 /* 2038 * Handle reversed lock ordering 2039 */ 2040 if (vm_object_hold_try(pmobj) == 0) { 2041 refcount_acquire(&pmobj->hold_count); 2042 vm_page_spin_unlock(m); 2043 vm_object_lock(pmobj); 2044 vm_page_spin_lock(m); 2045 if (pv != TAILQ_FIRST(&m->md.pv_list) || 2046 pmap != pv->pv_pmap || 2047 pmobj != pmap->pm_pteobj) { 2048 vm_page_spin_unlock(m); 2049 vm_object_drop(pmobj); 2050 goto restart; 2051 } 2052 } 2053 2054 KKASSERT(pmap->pm_stats.resident_count > 0); 2055 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2056 2057 pte = pmap_pte(pmap, pv->pv_va); 2058 KKASSERT(pte != NULL); 2059 2060 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2061 if (tpte & VPTE_WIRED) 2062 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2063 KKASSERT(pmap->pm_stats.wired_count >= 0); 2064 2065 if (tpte & VPTE_A) 2066 vm_page_flag_set(m, PG_REFERENCED); 2067 2068 /* 2069 * Update the vm_page_t clean and reference bits. 2070 */ 2071 if (tpte & VPTE_M) { 2072 #if defined(PMAP_DIAGNOSTIC) 2073 if (pmap_nw_modified(tpte)) { 2074 kprintf( 2075 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 2076 pv->pv_va, tpte); 2077 } 2078 #endif 2079 pmap_track_modified(pmap, pv->pv_va); 2080 vm_page_dirty(m); 2081 } 2082 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2083 if (TAILQ_EMPTY(&m->md.pv_list)) 2084 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2085 m->md.pv_list_count--; 2086 KKASSERT(m->md.pv_list_count >= 0); 2087 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 2088 atomic_add_int(&pmap->pm_generation, 1); 2089 vm_page_spin_unlock(m); 2090 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2091 free_pv_entry(pv); 2092 2093 vm_object_drop(pmobj); 2094 vm_page_spin_lock(m); 2095 } 2096 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); 2097 vm_page_spin_unlock(m); 2098 } 2099 2100 /* 2101 * Removes the page from a particular pmap 2102 */ 2103 void 2104 pmap_remove_specific(pmap_t pmap, vm_page_t m) 2105 { 2106 pt_entry_t *pte, tpte; 2107 pv_entry_t pv; 2108 2109 vm_object_hold(pmap->pm_pteobj); 2110 again: 2111 vm_page_spin_lock(m); 2112 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2113 if (pv->pv_pmap != pmap) 2114 continue; 2115 2116 KKASSERT(pmap->pm_stats.resident_count > 0); 2117 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2118 2119 pte = pmap_pte(pmap, pv->pv_va); 2120 KKASSERT(pte != NULL); 2121 2122 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2123 if (tpte & VPTE_WIRED) 2124 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2125 KKASSERT(pmap->pm_stats.wired_count >= 0); 2126 2127 if (tpte & VPTE_A) 2128 vm_page_flag_set(m, PG_REFERENCED); 2129 2130 /* 2131 * Update the vm_page_t clean and reference bits. 2132 */ 2133 if (tpte & VPTE_M) { 2134 pmap_track_modified(pmap, pv->pv_va); 2135 vm_page_dirty(m); 2136 } 2137 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2138 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 2139 atomic_add_int(&pmap->pm_generation, 1); 2140 m->md.pv_list_count--; 2141 KKASSERT(m->md.pv_list_count >= 0); 2142 if (TAILQ_EMPTY(&m->md.pv_list)) 2143 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2144 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2145 vm_page_spin_unlock(m); 2146 free_pv_entry(pv); 2147 goto again; 2148 } 2149 vm_page_spin_unlock(m); 2150 vm_object_drop(pmap->pm_pteobj); 2151 } 2152 2153 /* 2154 * Set the physical protection on the specified range of this map 2155 * as requested. 2156 * 2157 * This function may not be called from an interrupt if the map is 2158 * not the kernel_pmap. 2159 * 2160 * No requirements. 2161 */ 2162 void 2163 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2164 { 2165 vm_offset_t va_next; 2166 pml4_entry_t *pml4e; 2167 pdp_entry_t *pdpe; 2168 pd_entry_t ptpaddr, *pde; 2169 pt_entry_t *pte; 2170 vm_page_t pt_m; 2171 2172 if (pmap == NULL) 2173 return; 2174 2175 if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == VM_PROT_NONE) { 2176 pmap_remove(pmap, sva, eva); 2177 return; 2178 } 2179 2180 if (prot & VM_PROT_WRITE) 2181 return; 2182 2183 vm_object_hold(pmap->pm_pteobj); 2184 2185 for (; sva < eva; sva = va_next) { 2186 pml4e = pmap_pml4e(pmap, sva); 2187 if ((*pml4e & VPTE_V) == 0) { 2188 va_next = (sva + NBPML4) & ~PML4MASK; 2189 if (va_next < sva) 2190 va_next = eva; 2191 continue; 2192 } 2193 2194 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2195 if ((*pdpe & VPTE_V) == 0) { 2196 va_next = (sva + NBPDP) & ~PDPMASK; 2197 if (va_next < sva) 2198 va_next = eva; 2199 continue; 2200 } 2201 2202 va_next = (sva + NBPDR) & ~PDRMASK; 2203 if (va_next < sva) 2204 va_next = eva; 2205 2206 pde = pmap_pdpe_to_pde(pdpe, sva); 2207 ptpaddr = *pde; 2208 2209 #if 0 2210 /* 2211 * Check for large page. 2212 */ 2213 if ((ptpaddr & VPTE_PS) != 0) { 2214 /* JG correct? */ 2215 pmap_clean_pde(pde, pmap, sva); 2216 atomic_add_long(&pmap->pm_stats.resident_count, 2217 -NBPDR / PAGE_SIZE); 2218 continue; 2219 } 2220 #endif 2221 2222 /* 2223 * Weed out invalid mappings. Note: we assume that the page 2224 * directory table is always allocated, and in kernel virtual. 2225 */ 2226 if (ptpaddr == 0) 2227 continue; 2228 2229 if (va_next > eva) 2230 va_next = eva; 2231 2232 pt_m = pmap_hold_pt_page(pde, sva); 2233 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2234 sva += PAGE_SIZE) { 2235 /* 2236 * Clean managed pages and also check the accessed 2237 * bit. Just remove write perms for unmanaged 2238 * pages. Be careful of races, turning off write 2239 * access will force a fault rather then setting 2240 * the modified bit at an unexpected time. 2241 */ 2242 pmap_track_modified(pmap, sva); 2243 pmap_clean_pte(pte, pmap, sva, NULL); 2244 } 2245 vm_page_unhold(pt_m); 2246 } 2247 vm_object_drop(pmap->pm_pteobj); 2248 } 2249 2250 /* 2251 * Enter a managed page into a pmap. If the page is not wired related pmap 2252 * data can be destroyed at any time for later demand-operation. 2253 * 2254 * Insert the vm_page (m) at virtual address (v) in (pmap), with the 2255 * specified protection, and wire the mapping if requested. 2256 * 2257 * NOTE: This routine may not lazy-evaluate or lose information. The 2258 * page must actually be inserted into the given map NOW. 2259 * 2260 * NOTE: When entering a page at a KVA address, the pmap must be the 2261 * kernel_pmap. 2262 * 2263 * No requirements. 2264 */ 2265 void 2266 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2267 boolean_t wired, vm_map_entry_t entry __unused) 2268 { 2269 vm_paddr_t pa; 2270 pv_entry_t pv; 2271 pt_entry_t *pte; 2272 pt_entry_t origpte, newpte; 2273 vm_paddr_t opa; 2274 vm_page_t mpte; 2275 2276 if (pmap == NULL) 2277 return; 2278 2279 va = trunc_page(va); 2280 2281 vm_object_hold(pmap->pm_pteobj); 2282 2283 /* 2284 * Get the page table page. The kernel_pmap's page table pages 2285 * are preallocated and have no associated vm_page_t. 2286 * 2287 * If not NULL, mpte will be busied and we must vm_page_wakeup() 2288 * to cleanup. There will already be at least one wire count from 2289 * it being mapped into its parent. 2290 */ 2291 if (pmap == kernel_pmap) { 2292 mpte = NULL; 2293 pte = vtopte(va); 2294 } else { 2295 mpte = pmap_allocpte(pmap, va); 2296 pte = (void *)PHYS_TO_DMAP(mpte->phys_addr); 2297 pte += pmap_pte_index(va); 2298 } 2299 2300 /* 2301 * Deal with races against the kernel's real MMU by cleaning the 2302 * page, even if we are re-entering the same page. 2303 */ 2304 pa = VM_PAGE_TO_PHYS(m); 2305 origpte = pmap_inval_loadandclear(pte, pmap, va); 2306 /*origpte = pmap_clean_pte(pte, pmap, va, NULL);*/ 2307 opa = origpte & VPTE_FRAME; 2308 2309 if (origpte & VPTE_PS) 2310 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2311 2312 if ((origpte & (VPTE_MANAGED|VPTE_M)) == (VPTE_MANAGED|VPTE_M)) { 2313 vm_page_t om; 2314 2315 pmap_track_modified(pmap, va); 2316 om = PHYS_TO_VM_PAGE(opa); 2317 vm_page_dirty(om); 2318 } 2319 2320 /* 2321 * Mapping has not changed, must be protection or wiring change. 2322 */ 2323 if (origpte && (opa == pa)) { 2324 /* 2325 * Wiring change, just update stats. We don't worry about 2326 * wiring PT pages as they remain resident as long as there 2327 * are valid mappings in them. Hence, if a user page is wired, 2328 * the PT page will be also. 2329 */ 2330 if (wired && ((origpte & VPTE_WIRED) == 0)) 2331 atomic_add_long(&pmap->pm_stats.wired_count, 1); 2332 else if (!wired && (origpte & VPTE_WIRED)) 2333 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2334 2335 if (origpte & VPTE_MANAGED) { 2336 pa |= VPTE_MANAGED; 2337 KKASSERT(m->flags & PG_MAPPED); 2338 KKASSERT((m->flags & PG_FICTITIOUS) == 0); 2339 } else { 2340 KKASSERT((m->flags & PG_FICTITIOUS)); 2341 } 2342 vm_page_spin_lock(m); 2343 goto validate; 2344 } 2345 2346 /* 2347 * Bump the wire_count for the page table page. 2348 */ 2349 if (mpte) 2350 vm_page_wire_quick(mpte); 2351 2352 /* 2353 * Mapping has changed, invalidate old range and fall through to 2354 * handle validating new mapping. Don't inherit anything from 2355 * oldpte. 2356 */ 2357 if (opa) { 2358 int err; 2359 err = pmap_remove_pte(pmap, NULL, origpte, va); 2360 origpte = 0; 2361 if (err) 2362 panic("pmap_enter: pte vanished, va: 0x%lx", va); 2363 } 2364 2365 /* 2366 * Enter on the PV list if part of our managed memory. Note that we 2367 * raise IPL while manipulating pv_table since pmap_enter can be 2368 * called at interrupt time. 2369 */ 2370 if (pmap_initialized) { 2371 if ((m->flags & PG_FICTITIOUS) == 0) { 2372 /* 2373 * WARNING! We are using m's spin-lock as a 2374 * man's pte lock to interlock against 2375 * pmap_page_protect() operations. 2376 * 2377 * This is a bad hack (obviously). 2378 */ 2379 pv = get_pv_entry(); 2380 vm_page_spin_lock(m); 2381 pmap_insert_entry(pmap, va, mpte, m, pv); 2382 pa |= VPTE_MANAGED; 2383 /* vm_page_spin_unlock(m); */ 2384 } else { 2385 vm_page_spin_lock(m); 2386 } 2387 } else { 2388 vm_page_spin_lock(m); 2389 } 2390 2391 /* 2392 * Increment counters 2393 */ 2394 atomic_add_long(&pmap->pm_stats.resident_count, 1); 2395 if (wired) 2396 atomic_add_long(&pmap->pm_stats.wired_count, 1); 2397 2398 validate: 2399 /* 2400 * Now validate mapping with desired protection/wiring. 2401 */ 2402 newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | VPTE_V | VPTE_U); 2403 newpte |= VPTE_A; 2404 2405 if (wired) 2406 newpte |= VPTE_WIRED; 2407 // if (pmap != kernel_pmap) 2408 newpte |= VPTE_U; 2409 if (newpte & VPTE_RW) 2410 vm_page_flag_set(m, PG_WRITEABLE); 2411 KKASSERT((newpte & VPTE_MANAGED) == 0 || (m->flags & PG_MAPPED)); 2412 2413 origpte = atomic_swap_long(pte, newpte); 2414 if (origpte & VPTE_M) { 2415 kprintf("pmap [M] race @ %016jx\n", va); 2416 atomic_set_long(pte, VPTE_M); 2417 } 2418 vm_page_spin_unlock(m); 2419 2420 if (mpte) 2421 vm_page_wakeup(mpte); 2422 vm_object_drop(pmap->pm_pteobj); 2423 } 2424 2425 /* 2426 * Make a temporary mapping for a physical address. This is only intended 2427 * to be used for panic dumps. 2428 * 2429 * The caller is responsible for calling smp_invltlb(). 2430 */ 2431 void * 2432 pmap_kenter_temporary(vm_paddr_t pa, long i) 2433 { 2434 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa); 2435 return ((void *)crashdumpmap); 2436 } 2437 2438 #define MAX_INIT_PT (96) 2439 2440 /* 2441 * This routine preloads the ptes for a given object into the specified pmap. 2442 * This eliminates the blast of soft faults on process startup and 2443 * immediately after an mmap. 2444 * 2445 * No requirements. 2446 */ 2447 static int pmap_object_init_pt_callback(vm_page_t p, void *data); 2448 2449 void 2450 pmap_object_init_pt(pmap_t pmap, vm_map_entry_t entry, 2451 vm_offset_t addr, vm_size_t size, int limit) 2452 { 2453 vm_prot_t prot = entry->protection; 2454 vm_object_t object = entry->ba.object; 2455 vm_pindex_t pindex = atop(entry->ba.offset + (addr - entry->ba.start)); 2456 struct rb_vm_page_scan_info info; 2457 struct lwp *lp; 2458 vm_size_t psize; 2459 2460 /* 2461 * We can't preinit if read access isn't set or there is no pmap 2462 * or object. 2463 */ 2464 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) 2465 return; 2466 2467 /* 2468 * We can't preinit if the pmap is not the current pmap 2469 */ 2470 lp = curthread->td_lwp; 2471 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) 2472 return; 2473 2474 /* 2475 * Misc additional checks 2476 */ 2477 psize = x86_64_btop(size); 2478 2479 if ((object->type != OBJT_VNODE) || 2480 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2481 (object->resident_page_count > MAX_INIT_PT))) { 2482 return; 2483 } 2484 2485 if (psize + pindex > object->size) { 2486 if (object->size < pindex) 2487 return; 2488 psize = object->size - pindex; 2489 } 2490 2491 if (psize == 0) 2492 return; 2493 2494 /* 2495 * Use a red-black scan to traverse the requested range and load 2496 * any valid pages found into the pmap. 2497 * 2498 * We cannot safely scan the object's memq unless we are in a 2499 * critical section since interrupts can remove pages from objects. 2500 */ 2501 info.start_pindex = pindex; 2502 info.end_pindex = pindex + psize - 1; 2503 info.limit = limit; 2504 info.mpte = NULL; 2505 info.addr = addr; 2506 info.pmap = pmap; 2507 info.entry = entry; 2508 2509 vm_object_hold_shared(object); 2510 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, 2511 pmap_object_init_pt_callback, &info); 2512 vm_object_drop(object); 2513 } 2514 2515 static 2516 int 2517 pmap_object_init_pt_callback(vm_page_t p, void *data) 2518 { 2519 struct rb_vm_page_scan_info *info = data; 2520 vm_pindex_t rel_index; 2521 /* 2522 * don't allow an madvise to blow away our really 2523 * free pages allocating pv entries. 2524 */ 2525 if ((info->limit & MAP_PREFAULT_MADVISE) && 2526 vmstats.v_free_count < vmstats.v_free_reserved) { 2527 return(-1); 2528 } 2529 2530 /* 2531 * Ignore list markers and ignore pages we cannot instantly 2532 * busy (while holding the object token). 2533 */ 2534 if (p->flags & PG_MARKER) 2535 return 0; 2536 if (vm_page_busy_try(p, TRUE)) 2537 return 0; 2538 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2539 (p->flags & PG_FICTITIOUS) == 0) { 2540 if ((p->queue - p->pc) == PQ_CACHE) 2541 vm_page_deactivate(p); 2542 rel_index = p->pindex - info->start_pindex; 2543 pmap_enter(info->pmap, info->addr + x86_64_ptob(rel_index), p, 2544 VM_PROT_READ, FALSE, info->entry); 2545 } 2546 vm_page_wakeup(p); 2547 return(0); 2548 } 2549 2550 /* 2551 * Return TRUE if the pmap is in shape to trivially 2552 * pre-fault the specified address. 2553 * 2554 * Returns FALSE if it would be non-trivial or if a 2555 * pte is already loaded into the slot. 2556 * 2557 * No requirements. 2558 */ 2559 int 2560 pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) 2561 { 2562 pt_entry_t *pte; 2563 pd_entry_t *pde; 2564 int ret; 2565 2566 vm_object_hold(pmap->pm_pteobj); 2567 pde = pmap_pde(pmap, addr); 2568 if (pde == NULL || *pde == 0) { 2569 ret = 0; 2570 } else { 2571 pte = pmap_pde_to_pte(pde, addr); 2572 ret = (*pte) ? 0 : 1; 2573 } 2574 vm_object_drop(pmap->pm_pteobj); 2575 2576 return (ret); 2577 } 2578 2579 /* 2580 * Change the wiring attribute for a map/virtual-address pair. 2581 * 2582 * The mapping must already exist in the pmap. 2583 * No other requirements. 2584 */ 2585 vm_page_t 2586 pmap_unwire(pmap_t pmap, vm_offset_t va) 2587 { 2588 pt_entry_t *pte; 2589 vm_paddr_t pa; 2590 vm_page_t m; 2591 2592 if (pmap == NULL) 2593 return NULL; 2594 2595 vm_object_hold(pmap->pm_pteobj); 2596 pte = pmap_pte(pmap, va); 2597 2598 if (pte == NULL || (*pte & VPTE_V) == 0) { 2599 vm_object_drop(pmap->pm_pteobj); 2600 return NULL; 2601 } 2602 2603 /* 2604 * Wiring is not a hardware characteristic so there is no need to 2605 * invalidate TLB. However, in an SMP environment we must use 2606 * a locked bus cycle to update the pte (if we are not using 2607 * the pmap_inval_*() API that is)... it's ok to do this for simple 2608 * wiring changes. 2609 */ 2610 if (pmap_pte_w(pte)) 2611 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2612 /* XXX else return NULL so caller doesn't unwire m ? */ 2613 atomic_clear_long(pte, VPTE_WIRED); 2614 2615 pa = *pte & VPTE_FRAME; 2616 m = PHYS_TO_VM_PAGE(pa); /* held by wired count */ 2617 2618 vm_object_drop(pmap->pm_pteobj); 2619 2620 return m; 2621 } 2622 2623 /* 2624 * Copy the range specified by src_addr/len 2625 * from the source map to the range dst_addr/len 2626 * in the destination map. 2627 * 2628 * This routine is only advisory and need not do anything. 2629 */ 2630 void 2631 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2632 vm_size_t len, vm_offset_t src_addr) 2633 { 2634 /* 2635 * XXX BUGGY. Amoung other things srcmpte is assumed to remain 2636 * valid through blocking calls, and that's just not going to 2637 * be the case. 2638 * 2639 * FIXME! 2640 */ 2641 return; 2642 } 2643 2644 /* 2645 * pmap_zero_page: 2646 * 2647 * Zero the specified physical page. 2648 * 2649 * This function may be called from an interrupt and no locking is 2650 * required. 2651 */ 2652 void 2653 pmap_zero_page(vm_paddr_t phys) 2654 { 2655 vm_offset_t va = PHYS_TO_DMAP(phys); 2656 2657 bzero((void *)va, PAGE_SIZE); 2658 } 2659 2660 /* 2661 * pmap_zero_page: 2662 * 2663 * Zero part of a physical page by mapping it into memory and clearing 2664 * its contents with bzero. 2665 * 2666 * off and size may not cover an area beyond a single hardware page. 2667 */ 2668 void 2669 pmap_zero_page_area(vm_paddr_t phys, int off, int size) 2670 { 2671 vm_offset_t virt = PHYS_TO_DMAP(phys); 2672 2673 bzero((char *)virt + off, size); 2674 } 2675 2676 /* 2677 * pmap_copy_page: 2678 * 2679 * Copy the physical page from the source PA to the target PA. 2680 * This function may be called from an interrupt. No locking 2681 * is required. 2682 */ 2683 void 2684 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) 2685 { 2686 vm_offset_t src_virt, dst_virt; 2687 2688 src_virt = PHYS_TO_DMAP(src); 2689 dst_virt = PHYS_TO_DMAP(dst); 2690 bcopy((void *)src_virt, (void *)dst_virt, PAGE_SIZE); 2691 } 2692 2693 /* 2694 * pmap_copy_page_frag: 2695 * 2696 * Copy the physical page from the source PA to the target PA. 2697 * This function may be called from an interrupt. No locking 2698 * is required. 2699 */ 2700 void 2701 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) 2702 { 2703 vm_offset_t src_virt, dst_virt; 2704 2705 src_virt = PHYS_TO_DMAP(src); 2706 dst_virt = PHYS_TO_DMAP(dst); 2707 bcopy((char *)src_virt + (src & PAGE_MASK), 2708 (char *)dst_virt + (dst & PAGE_MASK), 2709 bytes); 2710 } 2711 2712 /* 2713 * Remove all pages from specified address space this aids process 2714 * exit speeds. Also, this code is special cased for current 2715 * process only, but can have the more generic (and slightly slower) 2716 * mode enabled. This is much faster than pmap_remove in the case 2717 * of running down an entire address space. 2718 * 2719 * No other requirements. 2720 */ 2721 void 2722 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2723 { 2724 pmap_remove(pmap, sva, eva); 2725 #if 0 2726 pt_entry_t *pte, tpte; 2727 pv_entry_t pv, npv; 2728 vm_page_t m; 2729 int save_generation; 2730 2731 if (pmap->pm_pteobj) 2732 vm_object_hold(pmap->pm_pteobj); 2733 2734 pmap_invalidate_range(pmap, sva, eva); 2735 2736 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2737 if (pv->pv_va >= eva || pv->pv_va < sva) { 2738 npv = TAILQ_NEXT(pv, pv_plist); 2739 continue; 2740 } 2741 2742 KKASSERT(pmap == pv->pv_pmap); 2743 2744 pte = pmap_pte(pmap, pv->pv_va); 2745 2746 /* 2747 * We cannot remove wired pages from a process' mapping 2748 * at this time 2749 */ 2750 if (*pte & VPTE_WIRED) { 2751 npv = TAILQ_NEXT(pv, pv_plist); 2752 continue; 2753 } 2754 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2755 2756 m = PHYS_TO_VM_PAGE(tpte & VPTE_FRAME); 2757 vm_page_spin_lock(m); 2758 2759 KASSERT(m < &vm_page_array[vm_page_array_size], 2760 ("pmap_remove_pages: bad tpte %lx", tpte)); 2761 2762 KKASSERT(pmap->pm_stats.resident_count > 0); 2763 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2764 2765 /* 2766 * Update the vm_page_t clean and reference bits. 2767 */ 2768 if (tpte & VPTE_M) { 2769 vm_page_dirty(m); 2770 } 2771 2772 npv = TAILQ_NEXT(pv, pv_plist); 2773 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 2774 atomic_add_int(&pmap->pm_generation, 1); 2775 save_generation = pmap->pm_generation; 2776 m->md.pv_list_count--; 2777 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2778 if (TAILQ_EMPTY(&m->md.pv_list)) 2779 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2780 vm_page_spin_unlock(m); 2781 2782 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2783 free_pv_entry(pv); 2784 2785 /* 2786 * Restart the scan if we blocked during the unuse or free 2787 * calls and other removals were made. 2788 */ 2789 if (save_generation != pmap->pm_generation) { 2790 kprintf("Warning: pmap_remove_pages race-A avoided\n"); 2791 npv = TAILQ_FIRST(&pmap->pm_pvlist); 2792 } 2793 } 2794 if (pmap->pm_pteobj) 2795 vm_object_drop(pmap->pm_pteobj); 2796 pmap_remove(pmap, sva, eva); 2797 #endif 2798 } 2799 2800 /* 2801 * pmap_testbit tests bits in active mappings of a VM page. 2802 */ 2803 static boolean_t 2804 pmap_testbit(vm_page_t m, int bit) 2805 { 2806 pv_entry_t pv; 2807 pt_entry_t *pte; 2808 2809 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2810 return FALSE; 2811 2812 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2813 return FALSE; 2814 2815 vm_page_spin_lock(m); 2816 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2817 /* 2818 * if the bit being tested is the modified bit, then 2819 * mark clean_map and ptes as never 2820 * modified. 2821 */ 2822 if (bit & (VPTE_A|VPTE_M)) 2823 pmap_track_modified(pv->pv_pmap, pv->pv_va); 2824 2825 #if defined(PMAP_DIAGNOSTIC) 2826 if (pv->pv_pmap == NULL) { 2827 kprintf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 2828 continue; 2829 } 2830 #endif 2831 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2832 if (*pte & bit) { 2833 vm_page_spin_unlock(m); 2834 return TRUE; 2835 } 2836 } 2837 vm_page_spin_unlock(m); 2838 return (FALSE); 2839 } 2840 2841 /* 2842 * This routine is used to clear bits in ptes. Certain bits require special 2843 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. 2844 * 2845 * This routine is only called with certain VPTE_* bit combinations. 2846 */ 2847 static __inline void 2848 pmap_clearbit(vm_page_t m, int bit) 2849 { 2850 pv_entry_t pv; 2851 pt_entry_t *pte; 2852 pt_entry_t pbits; 2853 vm_object_t pmobj; 2854 pmap_t pmap; 2855 2856 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2857 if (bit == VPTE_RW) 2858 vm_page_flag_clear(m, PG_WRITEABLE); 2859 return; 2860 } 2861 2862 /* 2863 * Loop over all current mappings setting/clearing as appropos If 2864 * setting RO do we need to clear the VAC? 2865 */ 2866 restart: 2867 vm_page_spin_lock(m); 2868 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2869 /* 2870 * Need the pmap object lock(?) 2871 */ 2872 pmap = pv->pv_pmap; 2873 pmobj = pmap->pm_pteobj; 2874 2875 if (vm_object_hold_try(pmobj) == 0) { 2876 refcount_acquire(&pmobj->hold_count); 2877 vm_page_spin_unlock(m); 2878 vm_object_lock(pmobj); 2879 vm_object_drop(pmobj); 2880 goto restart; 2881 } 2882 2883 /* 2884 * don't write protect pager mappings 2885 */ 2886 if (bit == VPTE_RW) { 2887 pmap_track_modified(pv->pv_pmap, pv->pv_va); 2888 } 2889 2890 #if defined(PMAP_DIAGNOSTIC) 2891 if (pv->pv_pmap == NULL) { 2892 kprintf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va); 2893 vm_object_drop(pmobj); 2894 continue; 2895 } 2896 #endif 2897 2898 /* 2899 * Careful here. We can use a locked bus instruction to 2900 * clear VPTE_A or VPTE_M safely but we need to synchronize 2901 * with the target cpus when we mess with VPTE_RW. 2902 * 2903 * On virtual kernels we must force a new fault-on-write 2904 * in the real kernel if we clear the Modify bit ourselves, 2905 * otherwise the real kernel will not get a new fault and 2906 * will never set our Modify bit again. 2907 */ 2908 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2909 if (*pte & bit) { 2910 if (bit == VPTE_RW) { 2911 /* 2912 * We must also clear VPTE_M when clearing 2913 * VPTE_RW and synchronize its state to 2914 * the page. 2915 */ 2916 pmap_track_modified(pv->pv_pmap, pv->pv_va); 2917 pbits = pmap_clean_pte(pte, pv->pv_pmap, 2918 pv->pv_va, m); 2919 } else if (bit == VPTE_M) { 2920 /* 2921 * We must invalidate the real-kernel pte 2922 * when clearing VPTE_M bit to force the 2923 * real-kernel to take a new fault to re-set 2924 * VPTE_M. 2925 */ 2926 atomic_clear_long(pte, VPTE_M); 2927 if (*pte & VPTE_RW) { 2928 pmap_invalidate_range(pv->pv_pmap, 2929 pv->pv_va, 2930 pv->pv_va + PAGE_SIZE); 2931 } 2932 } else if ((bit & (VPTE_RW|VPTE_M)) == 2933 (VPTE_RW|VPTE_M)) { 2934 /* 2935 * We've been asked to clear W & M, I guess 2936 * the caller doesn't want us to update 2937 * the dirty status of the VM page. 2938 */ 2939 pmap_track_modified(pv->pv_pmap, pv->pv_va); 2940 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va, m); 2941 panic("shouldn't be called"); 2942 } else { 2943 /* 2944 * We've been asked to clear bits that do 2945 * not interact with hardware. 2946 */ 2947 atomic_clear_long(pte, bit); 2948 } 2949 } 2950 vm_object_drop(pmobj); 2951 } 2952 if (bit == VPTE_RW) 2953 vm_page_flag_clear(m, PG_WRITEABLE); 2954 vm_page_spin_unlock(m); 2955 } 2956 2957 /* 2958 * Lower the permission for all mappings to a given page. 2959 * 2960 * No other requirements. 2961 */ 2962 void 2963 pmap_page_protect(vm_page_t m, vm_prot_t prot) 2964 { 2965 if ((prot & VM_PROT_WRITE) == 0) { 2966 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 2967 pmap_clearbit(m, VPTE_RW); 2968 } else { 2969 pmap_remove_all(m); 2970 } 2971 } 2972 } 2973 2974 vm_paddr_t 2975 pmap_phys_address(vm_pindex_t ppn) 2976 { 2977 return (x86_64_ptob(ppn)); 2978 } 2979 2980 /* 2981 * Return a count of reference bits for a page, clearing those bits. 2982 * It is not necessary for every reference bit to be cleared, but it 2983 * is necessary that 0 only be returned when there are truly no 2984 * reference bits set. 2985 * 2986 * XXX: The exact number of bits to check and clear is a matter that 2987 * should be tested and standardized at some point in the future for 2988 * optimal aging of shared pages. 2989 * 2990 * No other requirements. 2991 */ 2992 int 2993 pmap_ts_referenced(vm_page_t m) 2994 { 2995 pv_entry_t pv, pvf, pvn; 2996 pt_entry_t *pte; 2997 int rtval = 0; 2998 2999 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3000 return (rtval); 3001 3002 vm_page_spin_lock(m); 3003 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3004 pvf = pv; 3005 do { 3006 pvn = TAILQ_NEXT(pv, pv_list); 3007 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3008 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3009 3010 pmap_track_modified(pv->pv_pmap, pv->pv_va); 3011 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3012 3013 if (pte && (*pte & VPTE_A)) { 3014 atomic_clear_long(pte, VPTE_A); 3015 rtval++; 3016 if (rtval > 4) { 3017 break; 3018 } 3019 } 3020 } while ((pv = pvn) != NULL && pv != pvf); 3021 } 3022 vm_page_spin_unlock(m); 3023 3024 return (rtval); 3025 } 3026 3027 /* 3028 * Return whether or not the specified physical page was modified 3029 * in any physical maps. 3030 * 3031 * No other requirements. 3032 */ 3033 boolean_t 3034 pmap_is_modified(vm_page_t m) 3035 { 3036 boolean_t res; 3037 3038 res = pmap_testbit(m, VPTE_M); 3039 3040 return (res); 3041 } 3042 3043 /* 3044 * Clear the modify bits on the specified physical page. For the vkernel 3045 * we really need to clean the page, which clears VPTE_RW and VPTE_M, in 3046 * order to ensure that we take a fault on the next write to the page. 3047 * Otherwise the page may become dirty without us knowing it. 3048 * 3049 * No other requirements. 3050 */ 3051 void 3052 pmap_clear_modify(vm_page_t m) 3053 { 3054 pmap_clearbit(m, VPTE_RW); 3055 } 3056 3057 /* 3058 * Clear the reference bit on the specified physical page. 3059 * 3060 * No other requirements. 3061 */ 3062 void 3063 pmap_clear_reference(vm_page_t m) 3064 { 3065 pmap_clearbit(m, VPTE_A); 3066 } 3067 3068 /* 3069 * Miscellaneous support routines follow 3070 */ 3071 static void 3072 x86_64_protection_init(void) 3073 { 3074 uint64_t *kp; 3075 int prot; 3076 3077 kp = protection_codes; 3078 for (prot = 0; prot < 8; prot++) { 3079 if (prot & VM_PROT_READ) 3080 *kp |= 0; /* R */ 3081 if (prot & VM_PROT_WRITE) 3082 *kp |= VPTE_RW; /* R+W */ 3083 if (prot && (prot & VM_PROT_EXECUTE) == 0) 3084 *kp |= VPTE_NX; /* NX - !executable */ 3085 ++kp; 3086 } 3087 } 3088 3089 /* 3090 * Sets the memory attribute for the specified page. 3091 */ 3092 void 3093 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3094 { 3095 /* This is a vkernel, do nothing */ 3096 } 3097 3098 /* 3099 * Change the PAT attribute on an existing kernel memory map. Caller 3100 * must ensure that the virtual memory in question is not accessed 3101 * during the adjustment. 3102 */ 3103 void 3104 pmap_change_attr(vm_offset_t va, vm_size_t count, int mode) 3105 { 3106 /* This is a vkernel, do nothing */ 3107 } 3108 3109 /* 3110 * Perform the pmap work for mincore 3111 * 3112 * No other requirements. 3113 */ 3114 int 3115 pmap_mincore(pmap_t pmap, vm_offset_t addr) 3116 { 3117 pt_entry_t *ptep, pte; 3118 vm_page_t m; 3119 int val = 0; 3120 3121 vm_object_hold(pmap->pm_pteobj); 3122 ptep = pmap_pte(pmap, addr); 3123 3124 if (ptep && (pte = *ptep) != 0) { 3125 vm_paddr_t pa; 3126 3127 val = MINCORE_INCORE; 3128 if ((pte & VPTE_MANAGED) == 0) 3129 goto done; 3130 3131 pa = pte & VPTE_FRAME; 3132 3133 m = PHYS_TO_VM_PAGE(pa); 3134 3135 /* 3136 * Modified by us 3137 */ 3138 if (pte & VPTE_M) 3139 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3140 /* 3141 * Modified by someone 3142 */ 3143 else if (m->dirty || pmap_is_modified(m)) 3144 val |= MINCORE_MODIFIED_OTHER; 3145 /* 3146 * Referenced by us 3147 */ 3148 if (pte & VPTE_A) 3149 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3150 3151 /* 3152 * Referenced by someone 3153 */ 3154 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3155 val |= MINCORE_REFERENCED_OTHER; 3156 vm_page_flag_set(m, PG_REFERENCED); 3157 } 3158 } 3159 done: 3160 vm_object_drop(pmap->pm_pteobj); 3161 3162 return val; 3163 } 3164 3165 /* 3166 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new 3167 * vmspace will be ref'd and the old one will be deref'd. 3168 * 3169 * Caller must hold vmspace->vm_map.token for oldvm and newvm 3170 */ 3171 void 3172 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) 3173 { 3174 struct vmspace *oldvm; 3175 struct lwp *lp; 3176 3177 oldvm = p->p_vmspace; 3178 if (oldvm != newvm) { 3179 if (adjrefs) 3180 vmspace_ref(newvm); 3181 KKASSERT((newvm->vm_refcnt & VM_REF_DELETED) == 0); 3182 p->p_vmspace = newvm; 3183 KKASSERT(p->p_nthreads == 1); 3184 lp = RB_ROOT(&p->p_lwp_tree); 3185 pmap_setlwpvm(lp, newvm); 3186 if (adjrefs) 3187 vmspace_rel(oldvm); 3188 } 3189 } 3190 3191 /* 3192 * Set the vmspace for a LWP. The vmspace is almost universally set the 3193 * same as the process vmspace, but virtual kernels need to swap out contexts 3194 * on a per-lwp basis. 3195 */ 3196 void 3197 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) 3198 { 3199 struct vmspace *oldvm; 3200 struct pmap *pmap; 3201 3202 oldvm = lp->lwp_vmspace; 3203 if (oldvm != newvm) { 3204 crit_enter(); 3205 KKASSERT((newvm->vm_refcnt & VM_REF_DELETED) == 0); 3206 lp->lwp_vmspace = newvm; 3207 if (curthread->td_lwp == lp) { 3208 pmap = vmspace_pmap(newvm); 3209 ATOMIC_CPUMASK_ORBIT(pmap->pm_active, mycpu->gd_cpuid); 3210 if (pmap->pm_active_lock & CPULOCK_EXCL) 3211 pmap_interlock_wait(newvm); 3212 #if defined(SWTCH_OPTIM_STATS) 3213 tlb_flush_count++; 3214 #endif 3215 pmap = vmspace_pmap(oldvm); 3216 ATOMIC_CPUMASK_NANDBIT(pmap->pm_active, 3217 mycpu->gd_cpuid); 3218 } 3219 crit_exit(); 3220 } 3221 } 3222 3223 /* 3224 * The swtch code tried to switch in a heavy weight process whos pmap 3225 * is locked by another cpu. We have to wait for the lock to clear before 3226 * the pmap can be used. 3227 */ 3228 void 3229 pmap_interlock_wait (struct vmspace *vm) 3230 { 3231 pmap_t pmap = vmspace_pmap(vm); 3232 3233 if (pmap->pm_active_lock & CPULOCK_EXCL) { 3234 crit_enter(); 3235 while (pmap->pm_active_lock & CPULOCK_EXCL) { 3236 cpu_ccfence(); 3237 vkernel_yield(); 3238 } 3239 crit_exit(); 3240 } 3241 } 3242 3243 vm_offset_t 3244 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3245 { 3246 3247 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3248 return addr; 3249 } 3250 3251 addr = roundup2(addr, NBPDR); 3252 return addr; 3253 } 3254 3255 /* 3256 * Used by kmalloc/kfree, page already exists at va 3257 */ 3258 vm_page_t 3259 pmap_kvtom(vm_offset_t va) 3260 { 3261 vpte_t *ptep; 3262 3263 KKASSERT(va >= KvaStart && va < KvaEnd); 3264 ptep = vtopte(va); 3265 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); 3266 } 3267 3268 void 3269 pmap_object_init(vm_object_t object) 3270 { 3271 /* empty */ 3272 } 3273 3274 void 3275 pmap_object_free(vm_object_t object) 3276 { 3277 /* empty */ 3278 } 3279 3280 void 3281 pmap_pgscan(struct pmap_pgscan_info *pginfo) 3282 { 3283 pmap_t pmap = pginfo->pmap; 3284 vm_offset_t sva = pginfo->beg_addr; 3285 vm_offset_t eva = pginfo->end_addr; 3286 vm_offset_t va_next; 3287 pml4_entry_t *pml4e; 3288 pdp_entry_t *pdpe; 3289 pd_entry_t ptpaddr, *pde; 3290 pt_entry_t *pte; 3291 vm_page_t pt_m; 3292 int stop = 0; 3293 3294 vm_object_hold(pmap->pm_pteobj); 3295 3296 for (; sva < eva; sva = va_next) { 3297 if (stop) 3298 break; 3299 3300 pml4e = pmap_pml4e(pmap, sva); 3301 if ((*pml4e & VPTE_V) == 0) { 3302 va_next = (sva + NBPML4) & ~PML4MASK; 3303 if (va_next < sva) 3304 va_next = eva; 3305 continue; 3306 } 3307 3308 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 3309 if ((*pdpe & VPTE_V) == 0) { 3310 va_next = (sva + NBPDP) & ~PDPMASK; 3311 if (va_next < sva) 3312 va_next = eva; 3313 continue; 3314 } 3315 3316 va_next = (sva + NBPDR) & ~PDRMASK; 3317 if (va_next < sva) 3318 va_next = eva; 3319 3320 pde = pmap_pdpe_to_pde(pdpe, sva); 3321 ptpaddr = *pde; 3322 3323 #if 0 3324 /* 3325 * Check for large page (ignore). 3326 */ 3327 if ((ptpaddr & VPTE_PS) != 0) { 3328 #if 0 3329 pmap_clean_pde(pde, pmap, sva); 3330 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 3331 #endif 3332 continue; 3333 } 3334 #endif 3335 3336 /* 3337 * Weed out invalid mappings. Note: we assume that the page 3338 * directory table is always allocated, and in kernel virtual. 3339 */ 3340 if (ptpaddr == 0) 3341 continue; 3342 3343 if (va_next > eva) 3344 va_next = eva; 3345 3346 pt_m = pmap_hold_pt_page(pde, sva); 3347 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 3348 sva += PAGE_SIZE) { 3349 vm_page_t m; 3350 3351 if (stop) 3352 break; 3353 if ((*pte & VPTE_MANAGED) == 0) 3354 continue; 3355 3356 m = PHYS_TO_VM_PAGE(*pte & VPTE_FRAME); 3357 if (vm_page_busy_try(m, TRUE) == 0) { 3358 if (pginfo->callback(pginfo, sva, m) < 0) 3359 stop = 1; 3360 } 3361 } 3362 vm_page_unhold(pt_m); 3363 } 3364 vm_object_drop(pmap->pm_pteobj); 3365 } 3366 3367 void 3368 pmap_maybethreaded(pmap_t pmap) 3369 { 3370 /* nop */ 3371 } 3372 3373 /* 3374 * Called while page is hard-busied to clear the PG_MAPPED and PG_WRITEABLE 3375 * flags if able. 3376 * 3377 * vkernel code is using the old pmap style so the flags should already 3378 * be properly set. 3379 */ 3380 int 3381 pmap_mapped_sync(vm_page_t m) 3382 { 3383 return (m->flags); 3384 } 3385