1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * Copyright (c) 1994 John S. Dyson 4 * Copyright (c) 1994 David Greenman 5 * Copyright (c) 2003 Peter Wemm 6 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 7 * Copyright (c) 2008, 2009 The DragonFly Project. 8 * Copyright (c) 2008, 2009 Jordan Gordeev. 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ 45 */ 46 47 /* 48 * Manages physical address maps. 49 */ 50 51 #include "opt_msgbuf.h" 52 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/proc.h> 57 #include <sys/msgbuf.h> 58 #include <sys/vmmeter.h> 59 #include <sys/mman.h> 60 #include <sys/vmspace.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <sys/sysctl.h> 65 #include <sys/lock.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_pager.h> 73 #include <vm/vm_zone.h> 74 75 #include <sys/user.h> 76 #include <sys/thread2.h> 77 #include <sys/sysref2.h> 78 #include <sys/spinlock2.h> 79 #include <vm/vm_page2.h> 80 81 #include <machine/cputypes.h> 82 #include <machine/md_var.h> 83 #include <machine/specialreg.h> 84 #include <machine/smp.h> 85 #include <machine/globaldata.h> 86 #include <machine/pmap.h> 87 #include <machine/pmap_inval.h> 88 89 #include <ddb/ddb.h> 90 91 #include <stdio.h> 92 #include <assert.h> 93 #include <stdlib.h> 94 #include <pthread.h> 95 96 #define PMAP_KEEP_PDIRS 97 #ifndef PMAP_SHPGPERPROC 98 #define PMAP_SHPGPERPROC 1000 99 #endif 100 101 #if defined(DIAGNOSTIC) 102 #define PMAP_DIAGNOSTIC 103 #endif 104 105 #define MINPV 2048 106 107 #if !defined(PMAP_DIAGNOSTIC) 108 #define PMAP_INLINE __inline 109 #else 110 #define PMAP_INLINE 111 #endif 112 113 /* 114 * Get PDEs and PTEs for user/kernel address space 115 */ 116 static pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va); 117 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 118 119 #define pmap_pde_v(pte) ((*(pd_entry_t *)pte & VPTE_V) != 0) 120 #define pmap_pte_w(pte) ((*(pt_entry_t *)pte & VPTE_WIRED) != 0) 121 #define pmap_pte_m(pte) ((*(pt_entry_t *)pte & VPTE_M) != 0) 122 #define pmap_pte_u(pte) ((*(pt_entry_t *)pte & VPTE_A) != 0) 123 #define pmap_pte_v(pte) ((*(pt_entry_t *)pte & VPTE_V) != 0) 124 125 /* 126 * Given a map and a machine independent protection code, 127 * convert to a vax protection code. 128 */ 129 #define pte_prot(m, p) \ 130 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) 131 static uint64_t protection_codes[8]; 132 133 struct pmap kernel_pmap; 134 135 static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 136 137 static struct vm_object kptobj; 138 static int nkpt; 139 140 static uint64_t KPDphys; /* phys addr of kernel level 2 */ 141 uint64_t KPDPphys; /* phys addr of kernel level 3 */ 142 uint64_t KPML4phys; /* phys addr of kernel level 4 */ 143 144 extern int vmm_enabled; 145 extern void *vkernel_stack; 146 147 /* 148 * Data for the pv entry allocation mechanism 149 */ 150 static vm_zone_t pvzone; 151 static struct vm_zone pvzone_store; 152 static int pv_entry_count = 0; 153 static int pv_entry_max = 0; 154 static int pv_entry_high_water = 0; 155 static int pmap_pagedaemon_waken = 0; 156 static struct pv_entry *pvinit; 157 158 /* 159 * All those kernel PT submaps that BSD is so fond of 160 */ 161 pt_entry_t *CMAP1 = NULL, *ptmmap; 162 caddr_t CADDR1 = NULL; 163 static pt_entry_t *msgbufmap; 164 165 uint64_t KPTphys; 166 167 static PMAP_INLINE void free_pv_entry (pv_entry_t pv); 168 static pv_entry_t get_pv_entry (void); 169 static void i386_protection_init (void); 170 static __inline void pmap_clearbit (vm_page_t m, int bit); 171 172 static void pmap_remove_all (vm_page_t m); 173 static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, 174 pt_entry_t oldpte, vm_offset_t sva); 175 static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); 176 static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, 177 vm_offset_t va); 178 static boolean_t pmap_testbit (vm_page_t m, int bit); 179 static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, 180 vm_page_t mpte, vm_page_t m, pv_entry_t); 181 182 static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); 183 184 static int pmap_release_free_page (pmap_t pmap, vm_page_t p); 185 static vm_page_t _pmap_allocpte (pmap_t pmap, vm_pindex_t ptepindex); 186 static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); 187 static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); 188 189 static int 190 pv_entry_compare(pv_entry_t pv1, pv_entry_t pv2) 191 { 192 if (pv1->pv_va < pv2->pv_va) 193 return(-1); 194 if (pv1->pv_va > pv2->pv_va) 195 return(1); 196 return(0); 197 } 198 199 RB_GENERATE2(pv_entry_rb_tree, pv_entry, pv_entry, 200 pv_entry_compare, vm_offset_t, pv_va); 201 202 static __inline vm_pindex_t 203 pmap_pt_pindex(vm_offset_t va) 204 { 205 return va >> PDRSHIFT; 206 } 207 208 static __inline vm_pindex_t 209 pmap_pte_index(vm_offset_t va) 210 { 211 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 212 } 213 214 static __inline vm_pindex_t 215 pmap_pde_index(vm_offset_t va) 216 { 217 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 218 } 219 220 static __inline vm_pindex_t 221 pmap_pdpe_index(vm_offset_t va) 222 { 223 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 224 } 225 226 static __inline vm_pindex_t 227 pmap_pml4e_index(vm_offset_t va) 228 { 229 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 230 } 231 232 /* Return a pointer to the PML4 slot that corresponds to a VA */ 233 static __inline pml4_entry_t * 234 pmap_pml4e(pmap_t pmap, vm_offset_t va) 235 { 236 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 237 } 238 239 /* Return a pointer to the PDP slot that corresponds to a VA */ 240 static __inline pdp_entry_t * 241 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) 242 { 243 pdp_entry_t *pdpe; 244 245 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & VPTE_FRAME); 246 return (&pdpe[pmap_pdpe_index(va)]); 247 } 248 249 /* Return a pointer to the PDP slot that corresponds to a VA */ 250 static __inline pdp_entry_t * 251 pmap_pdpe(pmap_t pmap, vm_offset_t va) 252 { 253 pml4_entry_t *pml4e; 254 255 pml4e = pmap_pml4e(pmap, va); 256 if ((*pml4e & VPTE_V) == 0) 257 return NULL; 258 return (pmap_pml4e_to_pdpe(pml4e, va)); 259 } 260 261 /* Return a pointer to the PD slot that corresponds to a VA */ 262 static __inline pd_entry_t * 263 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) 264 { 265 pd_entry_t *pde; 266 267 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & VPTE_FRAME); 268 return (&pde[pmap_pde_index(va)]); 269 } 270 271 /* Return a pointer to the PD slot that corresponds to a VA */ 272 static __inline pd_entry_t * 273 pmap_pde(pmap_t pmap, vm_offset_t va) 274 { 275 pdp_entry_t *pdpe; 276 277 pdpe = pmap_pdpe(pmap, va); 278 if (pdpe == NULL || (*pdpe & VPTE_V) == 0) 279 return NULL; 280 return (pmap_pdpe_to_pde(pdpe, va)); 281 } 282 283 /* Return a pointer to the PT slot that corresponds to a VA */ 284 static __inline pt_entry_t * 285 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 286 { 287 pt_entry_t *pte; 288 289 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & VPTE_FRAME); 290 return (&pte[pmap_pte_index(va)]); 291 } 292 293 /* 294 * Hold pt_m for page table scans to prevent it from getting reused out 295 * from under us across blocking conditions in the body of the loop. 296 */ 297 static __inline 298 vm_page_t 299 pmap_hold_pt_page(pd_entry_t *pde, vm_offset_t va) 300 { 301 pt_entry_t pte; 302 vm_page_t pt_m; 303 304 pte = (pt_entry_t)*pde; 305 KKASSERT(pte != 0); 306 pt_m = PHYS_TO_VM_PAGE(pte & VPTE_FRAME); 307 vm_page_hold(pt_m); 308 309 return pt_m; 310 } 311 312 /* Return a pointer to the PT slot that corresponds to a VA */ 313 static __inline pt_entry_t * 314 pmap_pte(pmap_t pmap, vm_offset_t va) 315 { 316 pd_entry_t *pde; 317 318 pde = pmap_pde(pmap, va); 319 if (pde == NULL || (*pde & VPTE_V) == 0) 320 return NULL; 321 if ((*pde & VPTE_PS) != 0) /* compat with i386 pmap_pte() */ 322 return ((pt_entry_t *)pde); 323 return (pmap_pde_to_pte(pde, va)); 324 } 325 326 static PMAP_INLINE pt_entry_t * 327 vtopte(vm_offset_t va) 328 { 329 pt_entry_t *x; 330 x = pmap_pte(&kernel_pmap, va); 331 assert(x != NULL); 332 return x; 333 } 334 335 static __inline pd_entry_t * 336 vtopde(vm_offset_t va) 337 { 338 pd_entry_t *x; 339 x = pmap_pde(&kernel_pmap, va); 340 assert(x != NULL); 341 return x; 342 } 343 344 static uint64_t 345 allocpages(vm_paddr_t *firstaddr, int n) 346 { 347 uint64_t ret; 348 349 ret = *firstaddr; 350 /*bzero((void *)ret, n * PAGE_SIZE); not mapped yet */ 351 *firstaddr += n * PAGE_SIZE; 352 return (ret); 353 } 354 355 static void 356 create_dmap_vmm(vm_paddr_t *firstaddr) 357 { 358 void *stack_addr; 359 int pml4_stack_index; 360 int pdp_stack_index; 361 int pd_stack_index; 362 long i,j; 363 int regs[4]; 364 int amd_feature; 365 366 uint64_t KPDP_DMAP_phys = allocpages(firstaddr, NDMPML4E); 367 uint64_t KPDP_VSTACK_phys = allocpages(firstaddr, 1); 368 uint64_t KPD_VSTACK_phys = allocpages(firstaddr, 1); 369 370 pml4_entry_t *KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 371 pdp_entry_t *KPDP_DMAP_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_DMAP_phys); 372 pdp_entry_t *KPDP_VSTACK_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_VSTACK_phys); 373 pd_entry_t *KPD_VSTACK_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_VSTACK_phys); 374 375 bzero(KPDP_DMAP_virt, NDMPML4E * PAGE_SIZE); 376 bzero(KPDP_VSTACK_virt, 1 * PAGE_SIZE); 377 bzero(KPD_VSTACK_virt, 1 * PAGE_SIZE); 378 379 do_cpuid(0x80000001, regs); 380 amd_feature = regs[3]; 381 382 /* Build the mappings for the first 512GB */ 383 if (amd_feature & AMDID_PAGE1GB) { 384 /* In pages of 1 GB, if supported */ 385 for (i = 0; i < NPDPEPG; i++) { 386 KPDP_DMAP_virt[i] = ((uint64_t)i << PDPSHIFT); 387 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 388 } 389 } else { 390 /* In page of 2MB, otherwise */ 391 for (i = 0; i < NPDPEPG; i++) { 392 uint64_t KPD_DMAP_phys; 393 pd_entry_t *KPD_DMAP_virt; 394 395 KPD_DMAP_phys = allocpages(firstaddr, 1); 396 KPD_DMAP_virt = 397 (pd_entry_t *)PHYS_TO_DMAP(KPD_DMAP_phys); 398 399 bzero(KPD_DMAP_virt, PAGE_SIZE); 400 401 KPDP_DMAP_virt[i] = KPD_DMAP_phys; 402 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_U; 403 404 /* For each PD, we have to allocate NPTEPG PT */ 405 for (j = 0; j < NPTEPG; j++) { 406 KPD_DMAP_virt[j] = (i << PDPSHIFT) | 407 (j << PDRSHIFT); 408 KPD_DMAP_virt[j] |= VPTE_RW | VPTE_V | 409 VPTE_PS | VPTE_U; 410 } 411 } 412 } 413 414 /* DMAP for the first 512G */ 415 KPML4virt[0] = KPDP_DMAP_phys; 416 KPML4virt[0] |= VPTE_RW | VPTE_V | VPTE_U; 417 418 /* create a 2 MB map of the new stack */ 419 pml4_stack_index = (uint64_t)&stack_addr >> PML4SHIFT; 420 KPML4virt[pml4_stack_index] = KPDP_VSTACK_phys; 421 KPML4virt[pml4_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 422 423 pdp_stack_index = ((uint64_t)&stack_addr & PML4MASK) >> PDPSHIFT; 424 KPDP_VSTACK_virt[pdp_stack_index] = KPD_VSTACK_phys; 425 KPDP_VSTACK_virt[pdp_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 426 427 pd_stack_index = ((uint64_t)&stack_addr & PDPMASK) >> PDRSHIFT; 428 KPD_VSTACK_virt[pd_stack_index] = (uint64_t) vkernel_stack; 429 KPD_VSTACK_virt[pd_stack_index] |= VPTE_RW | VPTE_V | VPTE_U | VPTE_PS; 430 } 431 432 static void 433 create_pagetables(vm_paddr_t *firstaddr, int64_t ptov_offset) 434 { 435 int i; 436 pml4_entry_t *KPML4virt; 437 pdp_entry_t *KPDPvirt; 438 pd_entry_t *KPDvirt; 439 pt_entry_t *KPTvirt; 440 int kpml4i = pmap_pml4e_index(ptov_offset); 441 int kpdpi = pmap_pdpe_index(ptov_offset); 442 int kpdi = pmap_pde_index(ptov_offset); 443 444 /* 445 * Calculate NKPT - number of kernel page tables. We have to 446 * accomodoate prealloction of the vm_page_array, dump bitmap, 447 * MSGBUF_SIZE, and other stuff. Be generous. 448 * 449 * Maxmem is in pages. 450 */ 451 nkpt = (Maxmem * (sizeof(struct vm_page) * 2) + MSGBUF_SIZE) / NBPDR; 452 /* 453 * Allocate pages 454 */ 455 KPML4phys = allocpages(firstaddr, 1); 456 KPDPphys = allocpages(firstaddr, NKPML4E); 457 KPDphys = allocpages(firstaddr, NKPDPE); 458 KPTphys = allocpages(firstaddr, nkpt); 459 460 KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 461 KPDPvirt = (pdp_entry_t *)PHYS_TO_DMAP(KPDPphys); 462 KPDvirt = (pd_entry_t *)PHYS_TO_DMAP(KPDphys); 463 KPTvirt = (pt_entry_t *)PHYS_TO_DMAP(KPTphys); 464 465 bzero(KPML4virt, 1 * PAGE_SIZE); 466 bzero(KPDPvirt, NKPML4E * PAGE_SIZE); 467 bzero(KPDvirt, NKPDPE * PAGE_SIZE); 468 bzero(KPTvirt, nkpt * PAGE_SIZE); 469 470 /* Now map the page tables at their location within PTmap */ 471 for (i = 0; i < nkpt; i++) { 472 KPDvirt[i + kpdi] = KPTphys + (i << PAGE_SHIFT); 473 KPDvirt[i + kpdi] |= VPTE_RW | VPTE_V | VPTE_U; 474 } 475 476 /* And connect up the PD to the PDP */ 477 for (i = 0; i < NKPDPE; i++) { 478 KPDPvirt[i + kpdpi] = KPDphys + (i << PAGE_SHIFT); 479 KPDPvirt[i + kpdpi] |= VPTE_RW | VPTE_V | VPTE_U; 480 } 481 482 /* And recursively map PML4 to itself in order to get PTmap */ 483 KPML4virt[PML4PML4I] = KPML4phys; 484 KPML4virt[PML4PML4I] |= VPTE_RW | VPTE_V | VPTE_U; 485 486 /* Connect the KVA slot up to the PML4 */ 487 KPML4virt[kpml4i] = KPDPphys; 488 KPML4virt[kpml4i] |= VPTE_RW | VPTE_V | VPTE_U; 489 } 490 491 /* 492 * Typically used to initialize a fictitious page by vm/device_pager.c 493 */ 494 void 495 pmap_page_init(struct vm_page *m) 496 { 497 vm_page_init(m); 498 TAILQ_INIT(&m->md.pv_list); 499 } 500 501 /* 502 * Bootstrap the system enough to run with virtual memory. 503 * 504 * On the i386 this is called after mapping has already been enabled 505 * and just syncs the pmap module with what has already been done. 506 * [We can't call it easily with mapping off since the kernel is not 507 * mapped with PA == VA, hence we would have to relocate every address 508 * from the linked base (virtual) address "KERNBASE" to the actual 509 * (physical) address starting relative to 0] 510 */ 511 void 512 pmap_bootstrap(vm_paddr_t *firstaddr, int64_t ptov_offset) 513 { 514 vm_offset_t va; 515 pt_entry_t *pte; 516 517 /* 518 * Create an initial set of page tables to run the kernel in. 519 */ 520 create_pagetables(firstaddr, ptov_offset); 521 522 /* Create the DMAP for the VMM */ 523 if (vmm_enabled) { 524 create_dmap_vmm(firstaddr); 525 } 526 527 virtual_start = KvaStart; 528 virtual_end = KvaEnd; 529 530 /* 531 * Initialize protection array. 532 */ 533 i386_protection_init(); 534 535 /* 536 * The kernel's pmap is statically allocated so we don't have to use 537 * pmap_create, which is unlikely to work correctly at this part of 538 * the boot sequence (XXX and which no longer exists). 539 * 540 * The kernel_pmap's pm_pteobj is used only for locking and not 541 * for mmu pages. 542 */ 543 kernel_pmap.pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 544 kernel_pmap.pm_count = 1; 545 /* don't allow deactivation */ 546 CPUMASK_ASSALLONES(kernel_pmap.pm_active); 547 kernel_pmap.pm_pteobj = NULL; /* see pmap_init */ 548 RB_INIT(&kernel_pmap.pm_pvroot); 549 spin_init(&kernel_pmap.pm_spin, "pmapbootstrap"); 550 551 /* 552 * Reserve some special page table entries/VA space for temporary 553 * mapping of pages. 554 */ 555 #define SYSMAP(c, p, v, n) \ 556 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 557 558 va = virtual_start; 559 pte = pmap_pte(&kernel_pmap, va); 560 /* 561 * CMAP1/CMAP2 are used for zeroing and copying pages. 562 */ 563 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 564 565 #if JGV 566 /* 567 * Crashdump maps. 568 */ 569 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 570 #endif 571 572 /* 573 * ptvmmap is used for reading arbitrary physical pages via 574 * /dev/mem. 575 */ 576 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 577 578 /* 579 * msgbufp is used to map the system message buffer. 580 * XXX msgbufmap is not used. 581 */ 582 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 583 atop(round_page(MSGBUF_SIZE))) 584 585 virtual_start = va; 586 587 *CMAP1 = 0; 588 /* Not ready to do an invltlb yet for VMM*/ 589 if (!vmm_enabled) 590 cpu_invltlb(); 591 592 } 593 594 /* 595 * Initialize the pmap module. 596 * Called by vm_init, to initialize any structures that the pmap 597 * system needs to map virtual memory. 598 * pmap_init has been enhanced to support in a fairly consistant 599 * way, discontiguous physical memory. 600 */ 601 void 602 pmap_init(void) 603 { 604 int i; 605 int initial_pvs; 606 607 /* 608 * object for kernel page table pages 609 */ 610 /* JG I think the number can be arbitrary */ 611 vm_object_init(&kptobj, 5); 612 kernel_pmap.pm_pteobj = &kptobj; 613 614 /* 615 * Allocate memory for random pmap data structures. Includes the 616 * pv_head_table. 617 */ 618 for(i = 0; i < vm_page_array_size; i++) { 619 vm_page_t m; 620 621 m = &vm_page_array[i]; 622 TAILQ_INIT(&m->md.pv_list); 623 m->md.pv_list_count = 0; 624 } 625 626 /* 627 * init the pv free list 628 */ 629 initial_pvs = vm_page_array_size; 630 if (initial_pvs < MINPV) 631 initial_pvs = MINPV; 632 pvzone = &pvzone_store; 633 pvinit = (struct pv_entry *) 634 kmem_alloc(&kernel_map, 635 initial_pvs * sizeof (struct pv_entry), 636 VM_SUBSYS_PVENTRY); 637 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 638 initial_pvs); 639 640 /* 641 * Now it is safe to enable pv_table recording. 642 */ 643 pmap_initialized = TRUE; 644 } 645 646 /* 647 * Initialize the address space (zone) for the pv_entries. Set a 648 * high water mark so that the system can recover from excessive 649 * numbers of pv entries. 650 */ 651 void 652 pmap_init2(void) 653 { 654 int shpgperproc = PMAP_SHPGPERPROC; 655 656 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 657 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 658 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 659 pv_entry_high_water = 9 * (pv_entry_max / 10); 660 zinitna(pvzone, NULL, 0, pv_entry_max, ZONE_INTERRUPT); 661 } 662 663 664 /*************************************************** 665 * Low level helper routines..... 666 ***************************************************/ 667 668 /* 669 * The modification bit is not tracked for any pages in this range. XXX 670 * such pages in this maps should always use pmap_k*() functions and not 671 * be managed anyhow. 672 * 673 * XXX User and kernel address spaces are independant for virtual kernels, 674 * this function only applies to the kernel pmap. 675 */ 676 int 677 pmap_track_modified(pmap_t pmap, vm_offset_t va) 678 { 679 if (pmap != &kernel_pmap) 680 return 1; 681 if ((va < clean_sva) || (va >= clean_eva)) 682 return 1; 683 else 684 return 0; 685 } 686 687 /* 688 * Extract the physical page address associated with the map/VA pair. 689 * 690 * No requirements. 691 */ 692 vm_paddr_t 693 pmap_extract(pmap_t pmap, vm_offset_t va, void **handlep) 694 { 695 vm_paddr_t rtval; 696 pt_entry_t *pte; 697 pd_entry_t pde, *pdep; 698 699 vm_object_hold(pmap->pm_pteobj); 700 rtval = 0; 701 pdep = pmap_pde(pmap, va); 702 if (pdep != NULL) { 703 pde = *pdep; 704 if (pde) { 705 if ((pde & VPTE_PS) != 0) { 706 /* JGV */ 707 rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 708 } else { 709 pte = pmap_pde_to_pte(pdep, va); 710 rtval = (*pte & VPTE_FRAME) | (va & PAGE_MASK); 711 } 712 } 713 } 714 if (handlep) 715 *handlep = NULL; /* XXX */ 716 vm_object_drop(pmap->pm_pteobj); 717 718 return rtval; 719 } 720 721 void 722 pmap_extract_done(void *handle) 723 { 724 pmap_t pmap; 725 726 if (handle) { 727 pmap = handle; 728 vm_object_drop(pmap->pm_pteobj); 729 } 730 } 731 732 /* 733 * Similar to extract but checks protections, SMP-friendly short-cut for 734 * vm_fault_page[_quick](). 735 * 736 * WARNING! THE RETURNED PAGE IS ONLY HELD AND NEITHER IT NOR ITS TARGET 737 * DATA IS SUITABLE FOR WRITING. Writing can interfere with 738 * pageouts flushes, msync, etc. The hold_count is not enough 739 * to avoid races against pageouts and other flush code doesn't 740 * care about hold_count. 741 */ 742 vm_page_t 743 pmap_fault_page_quick(pmap_t pmap __unused, vm_offset_t vaddr __unused, 744 vm_prot_t prot __unused, int *busyp __unused) 745 { 746 return(NULL); 747 } 748 749 /* 750 * Routine: pmap_kextract 751 * Function: 752 * Extract the physical page address associated 753 * kernel virtual address. 754 */ 755 vm_paddr_t 756 pmap_kextract(vm_offset_t va) 757 { 758 pd_entry_t pde; 759 vm_paddr_t pa; 760 761 KKASSERT(va >= KvaStart && va < KvaEnd); 762 763 /* 764 * The DMAP region is not included in [KvaStart, KvaEnd) 765 */ 766 #if 0 767 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 768 pa = DMAP_TO_PHYS(va); 769 } else { 770 #endif 771 pde = *vtopde(va); 772 if (pde & VPTE_PS) { 773 /* JGV */ 774 pa = (pde & PG_PS_FRAME) | (va & PDRMASK); 775 } else { 776 /* 777 * Beware of a concurrent promotion that changes the 778 * PDE at this point! For example, vtopte() must not 779 * be used to access the PTE because it would use the 780 * new PDE. It is, however, safe to use the old PDE 781 * because the page table page is preserved by the 782 * promotion. 783 */ 784 pa = *pmap_pde_to_pte(&pde, va); 785 pa = (pa & VPTE_FRAME) | (va & PAGE_MASK); 786 } 787 #if 0 788 } 789 #endif 790 return pa; 791 } 792 793 /*************************************************** 794 * Low level mapping routines..... 795 ***************************************************/ 796 797 /* 798 * Enter a mapping into kernel_pmap. Mappings created in this fashion 799 * are not managed. Mappings must be immediately accessible on all cpus. 800 * 801 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the 802 * real pmap and handle related races before storing the new vpte. The 803 * new semantics for kenter require use to do an UNCONDITIONAL invalidation, 804 * because the entry may have previously been cleared without an invalidation. 805 */ 806 void 807 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 808 { 809 pt_entry_t *ptep; 810 pt_entry_t npte; 811 812 KKASSERT(va >= KvaStart && va < KvaEnd); 813 npte = pa | VPTE_RW | VPTE_V | VPTE_U; 814 ptep = vtopte(va); 815 816 #if 1 817 pmap_inval_pte(ptep, &kernel_pmap, va); 818 #else 819 if (*pte & VPTE_V) 820 pmap_inval_pte(ptep, &kernel_pmap, va); 821 #endif 822 atomic_swap_long(ptep, npte); 823 } 824 825 /* 826 * Enter an unmanaged KVA mapping for the private use of the current 827 * cpu only. 828 * 829 * It is illegal for the mapping to be accessed by other cpus without 830 * proper invalidation. 831 */ 832 int 833 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) 834 { 835 pt_entry_t *ptep; 836 pt_entry_t npte; 837 int res; 838 839 KKASSERT(va >= KvaStart && va < KvaEnd); 840 841 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 842 ptep = vtopte(va); 843 844 #if 1 845 pmap_inval_pte_quick(ptep, &kernel_pmap, va); 846 res = 1; 847 #else 848 /* FUTURE */ 849 res = (*ptep != 0); 850 if (*pte & VPTE_V) 851 pmap_inval_pte(pte, &kernel_pmap, va); 852 #endif 853 atomic_swap_long(ptep, npte); 854 855 return res; 856 } 857 858 /* 859 * Invalidation will occur later, ok to be lazy here. 860 */ 861 int 862 pmap_kenter_noinval(vm_offset_t va, vm_paddr_t pa) 863 { 864 pt_entry_t *ptep; 865 pt_entry_t npte; 866 int res; 867 868 KKASSERT(va >= KvaStart && va < KvaEnd); 869 870 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 871 ptep = vtopte(va); 872 #if 1 873 res = 1; 874 #else 875 /* FUTURE */ 876 res = (*ptep != 0); 877 #endif 878 atomic_swap_long(ptep, npte); 879 880 return res; 881 } 882 883 /* 884 * Remove an unmanaged mapping created with pmap_kenter*(). 885 */ 886 void 887 pmap_kremove(vm_offset_t va) 888 { 889 pt_entry_t *ptep; 890 891 KKASSERT(va >= KvaStart && va < KvaEnd); 892 893 ptep = vtopte(va); 894 atomic_swap_long(ptep, 0); 895 pmap_inval_pte(ptep, &kernel_pmap, va); 896 } 897 898 /* 899 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize 900 * only with this cpu. 901 * 902 * Unfortunately because we optimize new entries by testing VPTE_V later 903 * on, we actually still have to synchronize with all the cpus. XXX maybe 904 * store a junk value and test against 0 in the other places instead? 905 */ 906 void 907 pmap_kremove_quick(vm_offset_t va) 908 { 909 pt_entry_t *ptep; 910 911 KKASSERT(va >= KvaStart && va < KvaEnd); 912 913 ptep = vtopte(va); 914 atomic_swap_long(ptep, 0); 915 pmap_inval_pte(ptep, &kernel_pmap, va); /* NOT _quick */ 916 } 917 918 /* 919 * Invalidation will occur later, ok to be lazy here. 920 */ 921 void 922 pmap_kremove_noinval(vm_offset_t va) 923 { 924 pt_entry_t *ptep; 925 926 KKASSERT(va >= KvaStart && va < KvaEnd); 927 928 ptep = vtopte(va); 929 atomic_swap_long(ptep, 0); 930 } 931 932 /* 933 * Used to map a range of physical addresses into kernel 934 * virtual address space. 935 * 936 * For now, VM is already on, we only need to map the 937 * specified memory. 938 */ 939 vm_offset_t 940 pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) 941 { 942 return PHYS_TO_DMAP(start); 943 } 944 945 /* 946 * Map a set of unmanaged VM pages into KVM. 947 */ 948 void 949 pmap_qenter(vm_offset_t beg_va, vm_page_t *m, int count) 950 { 951 vm_offset_t end_va; 952 vm_offset_t va; 953 954 end_va = beg_va + count * PAGE_SIZE; 955 KKASSERT(beg_va >= KvaStart && end_va <= KvaEnd); 956 957 for (va = beg_va; va < end_va; va += PAGE_SIZE) { 958 pt_entry_t *ptep; 959 960 ptep = vtopte(va); 961 atomic_swap_long(ptep, VM_PAGE_TO_PHYS(*m) | 962 VPTE_RW | VPTE_V | VPTE_U); 963 ++m; 964 } 965 pmap_invalidate_range(&kernel_pmap, beg_va, end_va); 966 /* pmap_inval_pte(pte, &kernel_pmap, va); */ 967 } 968 969 /* 970 * Undo the effects of pmap_qenter*(). 971 */ 972 void 973 pmap_qremove(vm_offset_t beg_va, int count) 974 { 975 vm_offset_t end_va; 976 vm_offset_t va; 977 978 end_va = beg_va + count * PAGE_SIZE; 979 KKASSERT(beg_va >= KvaStart && end_va < KvaEnd); 980 981 for (va = beg_va; va < end_va; va += PAGE_SIZE) { 982 pt_entry_t *ptep; 983 984 ptep = vtopte(va); 985 atomic_swap_long(ptep, 0); 986 } 987 pmap_invalidate_range(&kernel_pmap, beg_va, end_va); 988 } 989 990 /* 991 * Unlike the real pmap code, we can't avoid calling the real-kernel. 992 */ 993 void 994 pmap_qremove_quick(vm_offset_t va, int count) 995 { 996 pmap_qremove(va, count); 997 } 998 999 void 1000 pmap_qremove_noinval(vm_offset_t va, int count) 1001 { 1002 pmap_qremove(va, count); 1003 } 1004 1005 /* 1006 * This routine works like vm_page_lookup() but also blocks as long as the 1007 * page is busy. This routine does not busy the page it returns. 1008 * 1009 * Unless the caller is managing objects whos pages are in a known state, 1010 * the call should be made with a critical section held so the page's object 1011 * association remains valid on return. 1012 */ 1013 static vm_page_t 1014 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 1015 { 1016 vm_page_t m; 1017 1018 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 1019 m = vm_page_lookup_busy_wait(object, pindex, TRUE, "pplookp"); 1020 1021 return(m); 1022 } 1023 1024 /* 1025 * Create a new thread and optionally associate it with a (new) process. 1026 * NOTE! the new thread's cpu may not equal the current cpu. 1027 */ 1028 void 1029 pmap_init_thread(thread_t td) 1030 { 1031 /* enforce pcb placement */ 1032 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; 1033 td->td_savefpu = &td->td_pcb->pcb_save; 1034 td->td_sp = (char *)td->td_pcb - 16; /* JG is -16 needed on x86_64? */ 1035 } 1036 1037 /* 1038 * This routine directly affects the fork perf for a process. 1039 */ 1040 void 1041 pmap_init_proc(struct proc *p) 1042 { 1043 } 1044 1045 /* 1046 * Unwire a page table which has been removed from the pmap. We own the 1047 * wire_count, so the page cannot go away. The page representing the page 1048 * table is passed in unbusied and must be busied if we cannot trivially 1049 * unwire it. 1050 * 1051 * XXX NOTE! This code is not usually run because we do not currently 1052 * implement dynamic page table page removal. The page in 1053 * its parent assumes at least 1 wire count, so no call to this 1054 * function ever sees a wire count less than 2. 1055 */ 1056 static int 1057 pmap_unwire_pgtable(pmap_t pmap, vm_offset_t va, vm_page_t m) 1058 { 1059 /* 1060 * Try to unwire optimally. If non-zero is returned the wire_count 1061 * is 1 and we must busy the page to unwire it. 1062 */ 1063 if (vm_page_unwire_quick(m) == 0) 1064 return 0; 1065 1066 vm_page_busy_wait(m, TRUE, "pmuwpt"); 1067 KASSERT(m->queue == PQ_NONE, 1068 ("_pmap_unwire_pgtable: %p->queue != PQ_NONE", m)); 1069 1070 if (m->wire_count == 1) { 1071 /* 1072 * Unmap the page table page. 1073 */ 1074 /* pmap_inval_add(info, pmap, -1); */ 1075 1076 if (m->pindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1077 /* PDP page */ 1078 pml4_entry_t *pml4; 1079 pml4 = pmap_pml4e(pmap, va); 1080 *pml4 = 0; 1081 } else if (m->pindex >= NUPT_TOTAL) { 1082 /* PD page */ 1083 pdp_entry_t *pdp; 1084 pdp = pmap_pdpe(pmap, va); 1085 *pdp = 0; 1086 } else { 1087 /* PT page */ 1088 pd_entry_t *pd; 1089 pd = pmap_pde(pmap, va); 1090 *pd = 0; 1091 } 1092 1093 KKASSERT(pmap->pm_stats.resident_count > 0); 1094 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1095 1096 if (pmap->pm_ptphint == m) 1097 pmap->pm_ptphint = NULL; 1098 1099 if (m->pindex < NUPT_TOTAL) { 1100 /* We just released a PT, unhold the matching PD */ 1101 vm_page_t pdpg; 1102 1103 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & 1104 VPTE_FRAME); 1105 pmap_unwire_pgtable(pmap, va, pdpg); 1106 } 1107 if (m->pindex >= NUPT_TOTAL && 1108 m->pindex < (NUPT_TOTAL + NUPD_TOTAL)) { 1109 /* We just released a PD, unhold the matching PDP */ 1110 vm_page_t pdppg; 1111 1112 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & 1113 VPTE_FRAME); 1114 pmap_unwire_pgtable(pmap, va, pdppg); 1115 } 1116 1117 /* 1118 * This was our last wire, the page had better be unwired 1119 * after we decrement wire_count. 1120 * 1121 * FUTURE NOTE: shared page directory page could result in 1122 * multiple wire counts. 1123 */ 1124 vm_page_unwire(m, 0); 1125 KKASSERT(m->wire_count == 0); 1126 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1127 vm_page_flash(m); 1128 vm_page_free(m); 1129 return 1; 1130 } else { 1131 /* XXX SMP race to 1 if not holding vmobj */ 1132 vm_page_unwire(m, 0); 1133 vm_page_wakeup(m); 1134 return 0; 1135 } 1136 } 1137 1138 /* 1139 * After removing a page table entry, this routine is used to 1140 * conditionally free the page, and manage the hold/wire counts. 1141 * 1142 * If not NULL the caller owns a wire_count on mpte, so it can't disappear. 1143 * If NULL the caller owns a wire_count on what would be the mpte, we must 1144 * look it up. 1145 */ 1146 static int 1147 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1148 { 1149 vm_pindex_t ptepindex; 1150 1151 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1152 1153 if (mpte == NULL) { 1154 /* 1155 * page table pages in the kernel_pmap are not managed. 1156 */ 1157 if (pmap == &kernel_pmap) 1158 return(0); 1159 ptepindex = pmap_pt_pindex(va); 1160 if (pmap->pm_ptphint && 1161 (pmap->pm_ptphint->pindex == ptepindex)) { 1162 mpte = pmap->pm_ptphint; 1163 } else { 1164 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1165 pmap->pm_ptphint = mpte; 1166 vm_page_wakeup(mpte); 1167 } 1168 } 1169 return pmap_unwire_pgtable(pmap, va, mpte); 1170 } 1171 1172 /* 1173 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we 1174 * just dummy it up so it works well enough for fork(). 1175 * 1176 * In DragonFly, process pmaps may only be used to manipulate user address 1177 * space, never kernel address space. 1178 */ 1179 void 1180 pmap_pinit0(struct pmap *pmap) 1181 { 1182 pmap_pinit(pmap); 1183 } 1184 1185 /* 1186 * Initialize a preallocated and zeroed pmap structure, 1187 * such as one in a vmspace structure. 1188 */ 1189 void 1190 pmap_pinit(struct pmap *pmap) 1191 { 1192 vm_page_t ptdpg; 1193 1194 /* 1195 * No need to allocate page table space yet but we do need a valid 1196 * page directory table. 1197 */ 1198 if (pmap->pm_pml4 == NULL) { 1199 pmap->pm_pml4 = (pml4_entry_t *) 1200 kmem_alloc_pageable(&kernel_map, PAGE_SIZE, 1201 VM_SUBSYS_PML4); 1202 } 1203 1204 /* 1205 * Allocate an object for the ptes 1206 */ 1207 if (pmap->pm_pteobj == NULL) 1208 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL + 1); 1209 1210 /* 1211 * Allocate the page directory page, unless we already have 1212 * one cached. If we used the cached page the wire_count will 1213 * already be set appropriately. 1214 */ 1215 if ((ptdpg = pmap->pm_pdirm) == NULL) { 1216 ptdpg = vm_page_grab(pmap->pm_pteobj, 1217 NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL, 1218 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | 1219 VM_ALLOC_ZERO); 1220 pmap->pm_pdirm = ptdpg; 1221 vm_page_flag_clear(ptdpg, PG_MAPPED | PG_WRITEABLE); 1222 vm_page_wire(ptdpg); 1223 vm_page_wakeup(ptdpg); 1224 pmap_kenter((vm_offset_t)pmap->pm_pml4, VM_PAGE_TO_PHYS(ptdpg)); 1225 } 1226 pmap->pm_count = 1; 1227 CPUMASK_ASSZERO(pmap->pm_active); 1228 pmap->pm_ptphint = NULL; 1229 RB_INIT(&pmap->pm_pvroot); 1230 spin_init(&pmap->pm_spin, "pmapinit"); 1231 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1232 pmap->pm_stats.resident_count = 1; 1233 pmap->pm_stats.wired_count = 1; 1234 } 1235 1236 /* 1237 * Clean up a pmap structure so it can be physically freed. This routine 1238 * is called by the vmspace dtor function. A great deal of pmap data is 1239 * left passively mapped to improve vmspace management so we have a bit 1240 * of cleanup work to do here. 1241 * 1242 * No requirements. 1243 */ 1244 void 1245 pmap_puninit(pmap_t pmap) 1246 { 1247 vm_page_t p; 1248 1249 KKASSERT(CPUMASK_TESTZERO(pmap->pm_active)); 1250 if ((p = pmap->pm_pdirm) != NULL) { 1251 KKASSERT(pmap->pm_pml4 != NULL); 1252 pmap_kremove((vm_offset_t)pmap->pm_pml4); 1253 vm_page_busy_wait(p, TRUE, "pgpun"); 1254 vm_page_unwire(p, 0); 1255 vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); 1256 vm_page_free(p); 1257 pmap->pm_pdirm = NULL; 1258 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1259 KKASSERT(pmap->pm_stats.wired_count == 0); 1260 } 1261 if (pmap->pm_pml4) { 1262 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE); 1263 pmap->pm_pml4 = NULL; 1264 } 1265 if (pmap->pm_pteobj) { 1266 vm_object_deallocate(pmap->pm_pteobj); 1267 pmap->pm_pteobj = NULL; 1268 } 1269 } 1270 1271 /* 1272 * This function is now unused (used to add the pmap to the pmap_list) 1273 */ 1274 void 1275 pmap_pinit2(struct pmap *pmap) 1276 { 1277 } 1278 1279 /* 1280 * Attempt to release and free a vm_page in a pmap. Returns 1 on success, 1281 * 0 on failure (if the procedure had to sleep). 1282 * 1283 * When asked to remove the page directory page itself, we actually just 1284 * leave it cached so we do not have to incur the SMP inval overhead of 1285 * removing the kernel mapping. pmap_puninit() will take care of it. 1286 */ 1287 static int 1288 pmap_release_free_page(struct pmap *pmap, vm_page_t p) 1289 { 1290 /* 1291 * This code optimizes the case of freeing non-busy 1292 * page-table pages. Those pages are zero now, and 1293 * might as well be placed directly into the zero queue. 1294 */ 1295 if (vm_page_busy_try(p, TRUE)) { 1296 vm_page_sleep_busy(p, TRUE, "pmaprl"); 1297 return 1; 1298 } 1299 1300 /* 1301 * Remove the page table page from the processes address space. 1302 */ 1303 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1304 /* 1305 * We are the pml4 table itself. 1306 */ 1307 /* XXX anything to do here? */ 1308 } else if (p->pindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1309 /* 1310 * We are a PDP page. 1311 * We look for the PML4 entry that points to us. 1312 */ 1313 vm_page_t m4; 1314 pml4_entry_t *pml4; 1315 int idx; 1316 1317 m4 = vm_page_lookup(pmap->pm_pteobj, 1318 NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL); 1319 KKASSERT(m4 != NULL); 1320 pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m4)); 1321 idx = (p->pindex - (NUPT_TOTAL + NUPD_TOTAL)) % NPML4EPG; 1322 KKASSERT(pml4[idx] != 0); 1323 if (pml4[idx] == 0) 1324 kprintf("pmap_release: Unmapped PML4\n"); 1325 pml4[idx] = 0; 1326 vm_page_unwire_quick(m4); 1327 } else if (p->pindex >= NUPT_TOTAL) { 1328 /* 1329 * We are a PD page. 1330 * We look for the PDP entry that points to us. 1331 */ 1332 vm_page_t m3; 1333 pdp_entry_t *pdp; 1334 int idx; 1335 1336 m3 = vm_page_lookup(pmap->pm_pteobj, 1337 NUPT_TOTAL + NUPD_TOTAL + 1338 (p->pindex - NUPT_TOTAL) / NPDPEPG); 1339 KKASSERT(m3 != NULL); 1340 pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m3)); 1341 idx = (p->pindex - NUPT_TOTAL) % NPDPEPG; 1342 KKASSERT(pdp[idx] != 0); 1343 if (pdp[idx] == 0) 1344 kprintf("pmap_release: Unmapped PDP %d\n", idx); 1345 pdp[idx] = 0; 1346 vm_page_unwire_quick(m3); 1347 } else { 1348 /* We are a PT page. 1349 * We look for the PD entry that points to us. 1350 */ 1351 vm_page_t m2; 1352 pd_entry_t *pd; 1353 int idx; 1354 1355 m2 = vm_page_lookup(pmap->pm_pteobj, 1356 NUPT_TOTAL + p->pindex / NPDEPG); 1357 KKASSERT(m2 != NULL); 1358 pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m2)); 1359 idx = p->pindex % NPDEPG; 1360 if (pd[idx] == 0) 1361 kprintf("pmap_release: Unmapped PD %d\n", idx); 1362 pd[idx] = 0; 1363 vm_page_unwire_quick(m2); 1364 } 1365 KKASSERT(pmap->pm_stats.resident_count > 0); 1366 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1367 1368 if (p->wire_count > 1) { 1369 panic("pmap_release: freeing held pt page " 1370 "pmap=%p pg=%p dmap=%p pi=%ld {%ld,%ld,%ld}", 1371 pmap, p, (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(p)), 1372 p->pindex, NUPT_TOTAL, NUPD_TOTAL, NUPDP_TOTAL); 1373 } 1374 1375 if (pmap->pm_ptphint == p) 1376 pmap->pm_ptphint = NULL; 1377 1378 /* 1379 * We leave the top-level page table page cached, wired, and mapped in 1380 * the pmap until the dtor function (pmap_puninit()) gets called. 1381 * However, still clean it up. 1382 */ 1383 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1384 bzero(pmap->pm_pml4, PAGE_SIZE); 1385 vm_page_wakeup(p); 1386 } else { 1387 vm_page_unwire(p, 0); 1388 vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); 1389 vm_page_free(p); 1390 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1391 } 1392 return 0; 1393 } 1394 1395 /* 1396 * Locate the requested PT, PD, or PDP page table page. 1397 * 1398 * Returns a busied page, caller must vm_page_wakeup() when done. 1399 */ 1400 static vm_page_t 1401 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex) 1402 { 1403 vm_page_t m; 1404 vm_page_t pm; 1405 vm_pindex_t pindex; 1406 pt_entry_t *ptep; 1407 pt_entry_t data; 1408 1409 /* 1410 * Find or fabricate a new pagetable page. A non-zero wire_count 1411 * indicates that the page has already been mapped into its parent. 1412 */ 1413 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1414 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1415 if (m->wire_count != 0) 1416 return m; 1417 1418 /* 1419 * Map the page table page into its parent, giving it 1 wire count. 1420 */ 1421 vm_page_wire(m); 1422 vm_page_unmanage(m); 1423 atomic_add_long(&pmap->pm_stats.resident_count, 1); 1424 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1425 1426 data = VM_PAGE_TO_PHYS(m) | 1427 VPTE_RW | VPTE_V | VPTE_U | VPTE_A | VPTE_M | VPTE_WIRED; 1428 atomic_add_long(&pmap->pm_stats.wired_count, 1); 1429 1430 if (ptepindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1431 /* 1432 * Map PDP into the PML4 1433 */ 1434 pindex = ptepindex - (NUPT_TOTAL + NUPD_TOTAL); 1435 pindex &= (NUPDP_TOTAL - 1); 1436 ptep = (pt_entry_t *)pmap->pm_pml4; 1437 pm = NULL; 1438 } else if (ptepindex >= NUPT_TOTAL) { 1439 /* 1440 * Map PD into its PDP 1441 */ 1442 pindex = (ptepindex - NUPT_TOTAL) >> NPDPEPGSHIFT; 1443 pindex += NUPT_TOTAL + NUPD_TOTAL; 1444 pm = _pmap_allocpte(pmap, pindex); 1445 pindex = (ptepindex - NUPT_TOTAL) & (NPDPEPG - 1); 1446 ptep = (void *)PHYS_TO_DMAP(pm->phys_addr); 1447 } else { 1448 /* 1449 * Map PT into its PD 1450 */ 1451 pindex = ptepindex >> NPDPEPGSHIFT; 1452 pindex += NUPT_TOTAL; 1453 pm = _pmap_allocpte(pmap, pindex); 1454 pindex = ptepindex & (NPTEPG - 1); 1455 ptep = (void *)PHYS_TO_DMAP(pm->phys_addr); 1456 } 1457 1458 /* 1459 * Install the pte in (pm). (m) prevents races. 1460 */ 1461 ptep += pindex; 1462 data = atomic_swap_long(ptep, data); 1463 if (pm) { 1464 vm_page_wire_quick(pm); 1465 vm_page_wakeup(pm); 1466 } 1467 pmap->pm_ptphint = pm; 1468 1469 return m; 1470 } 1471 1472 /* 1473 * Determine the page table page required to access the VA in the pmap 1474 * and allocate it if necessary. Return a held vm_page_t for the page. 1475 * 1476 * Only used with user pmaps. 1477 */ 1478 static vm_page_t 1479 pmap_allocpte(pmap_t pmap, vm_offset_t va) 1480 { 1481 vm_pindex_t ptepindex; 1482 vm_page_t m; 1483 1484 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1485 1486 /* 1487 * Calculate pagetable page index, and return the PT page to 1488 * the caller. 1489 */ 1490 ptepindex = pmap_pt_pindex(va); 1491 m = _pmap_allocpte(pmap, ptepindex); 1492 1493 return m; 1494 } 1495 1496 /*************************************************** 1497 * Pmap allocation/deallocation routines. 1498 ***************************************************/ 1499 1500 /* 1501 * Release any resources held by the given physical map. 1502 * Called when a pmap initialized by pmap_pinit is being released. 1503 * Should only be called if the map contains no valid mappings. 1504 */ 1505 static int pmap_release_callback(struct vm_page *p, void *data); 1506 1507 void 1508 pmap_release(struct pmap *pmap) 1509 { 1510 vm_object_t object = pmap->pm_pteobj; 1511 struct rb_vm_page_scan_info info; 1512 1513 KKASSERT(pmap != &kernel_pmap); 1514 1515 #if defined(DIAGNOSTIC) 1516 if (object->ref_count != 1) 1517 panic("pmap_release: pteobj reference count != 1"); 1518 #endif 1519 1520 info.pmap = pmap; 1521 info.object = object; 1522 1523 KASSERT(CPUMASK_TESTZERO(pmap->pm_active), 1524 ("pmap %p still active! %016jx", 1525 pmap, 1526 (uintmax_t)CPUMASK_LOWMASK(pmap->pm_active))); 1527 1528 vm_object_hold(object); 1529 do { 1530 info.error = 0; 1531 info.mpte = NULL; 1532 info.limit = object->generation; 1533 1534 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, 1535 pmap_release_callback, &info); 1536 if (info.error == 0 && info.mpte) { 1537 if (pmap_release_free_page(pmap, info.mpte)) 1538 info.error = 1; 1539 } 1540 } while (info.error); 1541 1542 pmap->pm_ptphint = NULL; 1543 1544 KASSERT((pmap->pm_stats.wired_count == (pmap->pm_pdirm != NULL)), 1545 ("pmap_release: dangling count %p %ld", 1546 pmap, pmap->pm_stats.wired_count)); 1547 1548 vm_object_drop(object); 1549 } 1550 1551 static int 1552 pmap_release_callback(struct vm_page *p, void *data) 1553 { 1554 struct rb_vm_page_scan_info *info = data; 1555 1556 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1557 info->mpte = p; 1558 return(0); 1559 } 1560 if (pmap_release_free_page(info->pmap, p)) { 1561 info->error = 1; 1562 return(-1); 1563 } 1564 if (info->object->generation != info->limit) { 1565 info->error = 1; 1566 return(-1); 1567 } 1568 return(0); 1569 } 1570 1571 /* 1572 * Grow the number of kernel page table entries, if needed. 1573 * 1574 * kernel_map must be locked exclusively by the caller. 1575 */ 1576 void 1577 pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) 1578 { 1579 vm_offset_t addr; 1580 vm_paddr_t paddr; 1581 vm_offset_t ptppaddr; 1582 vm_page_t nkpg; 1583 pd_entry_t *pde, newpdir; 1584 pdp_entry_t newpdp; 1585 1586 addr = kend; 1587 1588 vm_object_hold(&kptobj); 1589 if (kernel_vm_end == 0) { 1590 kernel_vm_end = KvaStart; 1591 nkpt = 0; 1592 while ((*pmap_pde(&kernel_pmap, kernel_vm_end) & VPTE_V) != 0) { 1593 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1594 nkpt++; 1595 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1596 kernel_vm_end = kernel_map.max_offset; 1597 break; 1598 } 1599 } 1600 } 1601 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1602 if (addr - 1 >= kernel_map.max_offset) 1603 addr = kernel_map.max_offset; 1604 while (kernel_vm_end < addr) { 1605 pde = pmap_pde(&kernel_pmap, kernel_vm_end); 1606 if (pde == NULL) { 1607 /* We need a new PDP entry */ 1608 nkpg = vm_page_alloc(&kptobj, nkpt, 1609 VM_ALLOC_NORMAL | 1610 VM_ALLOC_SYSTEM | 1611 VM_ALLOC_INTERRUPT); 1612 if (nkpg == NULL) { 1613 panic("pmap_growkernel: no memory to " 1614 "grow kernel"); 1615 } 1616 paddr = VM_PAGE_TO_PHYS(nkpg); 1617 pmap_zero_page(paddr); 1618 newpdp = (pdp_entry_t)(paddr | 1619 VPTE_V | VPTE_RW | VPTE_U | 1620 VPTE_A | VPTE_M | VPTE_WIRED); 1621 *pmap_pdpe(&kernel_pmap, kernel_vm_end) = newpdp; 1622 atomic_add_long(&kernel_pmap.pm_stats.wired_count, 1); 1623 nkpt++; 1624 continue; /* try again */ 1625 } 1626 if ((*pde & VPTE_V) != 0) { 1627 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1628 ~(PAGE_SIZE * NPTEPG - 1); 1629 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1630 kernel_vm_end = kernel_map.max_offset; 1631 break; 1632 } 1633 continue; 1634 } 1635 1636 /* 1637 * This index is bogus, but out of the way 1638 */ 1639 nkpg = vm_page_alloc(&kptobj, nkpt, 1640 VM_ALLOC_NORMAL | 1641 VM_ALLOC_SYSTEM | 1642 VM_ALLOC_INTERRUPT); 1643 if (nkpg == NULL) 1644 panic("pmap_growkernel: no memory to grow kernel"); 1645 1646 vm_page_wire(nkpg); 1647 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1648 pmap_zero_page(ptppaddr); 1649 newpdir = (pd_entry_t)(ptppaddr | 1650 VPTE_V | VPTE_RW | VPTE_U | 1651 VPTE_A | VPTE_M | VPTE_WIRED); 1652 *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir; 1653 atomic_add_long(&kernel_pmap.pm_stats.wired_count, 1); 1654 nkpt++; 1655 1656 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1657 ~(PAGE_SIZE * NPTEPG - 1); 1658 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1659 kernel_vm_end = kernel_map.max_offset; 1660 break; 1661 } 1662 } 1663 vm_object_drop(&kptobj); 1664 } 1665 1666 /* 1667 * Add a reference to the specified pmap. 1668 * 1669 * No requirements. 1670 */ 1671 void 1672 pmap_reference(pmap_t pmap) 1673 { 1674 if (pmap) 1675 atomic_add_int(&pmap->pm_count, 1); 1676 } 1677 1678 /************************************************************************ 1679 * VMSPACE MANAGEMENT * 1680 ************************************************************************ 1681 * 1682 * The VMSPACE management we do in our virtual kernel must be reflected 1683 * in the real kernel. This is accomplished by making vmspace system 1684 * calls to the real kernel. 1685 */ 1686 void 1687 cpu_vmspace_alloc(struct vmspace *vm) 1688 { 1689 int r; 1690 void *rp; 1691 vpte_t vpte; 1692 1693 /* 1694 * If VMM enable, don't do nothing, we 1695 * are able to use real page tables 1696 */ 1697 if (vmm_enabled) 1698 return; 1699 1700 #define USER_SIZE (VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) 1701 1702 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) 1703 panic("vmspace_create() failed"); 1704 1705 rp = vmspace_mmap(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1706 PROT_READ|PROT_WRITE|PROT_EXEC, 1707 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, 1708 MemImageFd, 0); 1709 if (rp == MAP_FAILED) 1710 panic("vmspace_mmap: failed"); 1711 vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1712 MADV_NOSYNC, 0); 1713 vpte = VM_PAGE_TO_PHYS(vmspace_pmap(vm)->pm_pdirm) | 1714 VPTE_RW | VPTE_V | VPTE_U; 1715 r = vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1716 MADV_SETMAP, vpte); 1717 if (r < 0) 1718 panic("vmspace_mcontrol: failed"); 1719 } 1720 1721 void 1722 cpu_vmspace_free(struct vmspace *vm) 1723 { 1724 /* 1725 * If VMM enable, don't do nothing, we 1726 * are able to use real page tables 1727 */ 1728 if (vmm_enabled) 1729 return; 1730 1731 if (vmspace_destroy(&vm->vm_pmap) < 0) 1732 panic("vmspace_destroy() failed"); 1733 } 1734 1735 /*************************************************** 1736 * page management routines. 1737 ***************************************************/ 1738 1739 /* 1740 * free the pv_entry back to the free list. This function may be 1741 * called from an interrupt. 1742 */ 1743 static __inline void 1744 free_pv_entry(pv_entry_t pv) 1745 { 1746 atomic_add_int(&pv_entry_count, -1); 1747 KKASSERT(pv_entry_count >= 0); 1748 zfree(pvzone, pv); 1749 } 1750 1751 /* 1752 * get a new pv_entry, allocating a block from the system 1753 * when needed. This function may be called from an interrupt. 1754 */ 1755 static pv_entry_t 1756 get_pv_entry(void) 1757 { 1758 atomic_add_int(&pv_entry_count, 1); 1759 if (pv_entry_high_water && 1760 (pv_entry_count > pv_entry_high_water) && 1761 atomic_swap_int(&pmap_pagedaemon_waken, 1) == 0) { 1762 wakeup(&vm_pages_needed); 1763 } 1764 return zalloc(pvzone); 1765 } 1766 1767 /* 1768 * This routine is very drastic, but can save the system 1769 * in a pinch. 1770 * 1771 * No requirements. 1772 */ 1773 void 1774 pmap_collect(void) 1775 { 1776 int i; 1777 vm_page_t m; 1778 static int warningdone=0; 1779 1780 if (pmap_pagedaemon_waken == 0) 1781 return; 1782 pmap_pagedaemon_waken = 0; 1783 1784 if (warningdone < 5) { 1785 kprintf("pmap_collect: collecting pv entries -- " 1786 "suggest increasing PMAP_SHPGPERPROC\n"); 1787 warningdone++; 1788 } 1789 1790 for (i = 0; i < vm_page_array_size; i++) { 1791 m = &vm_page_array[i]; 1792 if (m->wire_count || m->hold_count) 1793 continue; 1794 if (vm_page_busy_try(m, TRUE) == 0) { 1795 if (m->wire_count == 0 && m->hold_count == 0) { 1796 pmap_remove_all(m); 1797 } 1798 vm_page_wakeup(m); 1799 } 1800 } 1801 } 1802 1803 1804 /* 1805 * If it is the first entry on the list, it is actually 1806 * in the header and we must copy the following entry up 1807 * to the header. Otherwise we must search the list for 1808 * the entry. In either case we free the now unused entry. 1809 * 1810 * pmap->pm_pteobj must be held and (m) must be spin-locked by the caller. 1811 */ 1812 static int 1813 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1814 { 1815 pv_entry_t pv; 1816 int rtval; 1817 1818 vm_page_spin_lock(m); 1819 pv = pv_entry_rb_tree_RB_LOOKUP(&pmap->pm_pvroot, va); 1820 1821 /* 1822 * Note that pv_ptem is NULL if the page table page itself is not 1823 * managed, even if the page being removed IS managed. 1824 */ 1825 rtval = 0; 1826 if (pv) { 1827 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1828 if (TAILQ_EMPTY(&m->md.pv_list)) 1829 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1830 m->md.pv_list_count--; 1831 KKASSERT(m->md.pv_list_count >= 0); 1832 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 1833 atomic_add_int(&pmap->pm_generation, 1); 1834 vm_page_spin_unlock(m); 1835 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1836 free_pv_entry(pv); 1837 } else { 1838 vm_page_spin_unlock(m); 1839 kprintf("pmap_remove_entry: could not find " 1840 "pmap=%p m=%p va=%016jx\n", 1841 pmap, m, va); 1842 } 1843 return rtval; 1844 } 1845 1846 /* 1847 * Create a pv entry for page at pa for (pmap, va). If the page table page 1848 * holding the VA is managed, mpte will be non-NULL. 1849 * 1850 * pmap->pm_pteobj must be held and (m) must be spin-locked by the caller. 1851 */ 1852 static void 1853 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m, 1854 pv_entry_t pv) 1855 { 1856 pv->pv_va = va; 1857 pv->pv_pmap = pmap; 1858 pv->pv_ptem = mpte; 1859 1860 m->md.pv_list_count++; 1861 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1862 pv = pv_entry_rb_tree_RB_INSERT(&pmap->pm_pvroot, pv); 1863 vm_page_flag_set(m, PG_MAPPED); 1864 KKASSERT(pv == NULL); 1865 } 1866 1867 /* 1868 * pmap_remove_pte: do the things to unmap a page in a process 1869 * 1870 * Caller holds pmap->pm_pteobj and holds the associated page table 1871 * page busy to prevent races. 1872 */ 1873 static int 1874 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, pt_entry_t oldpte, 1875 vm_offset_t va) 1876 { 1877 vm_page_t m; 1878 int error; 1879 1880 if (ptq) 1881 oldpte = pmap_inval_loadandclear(ptq, pmap, va); 1882 1883 if (oldpte & VPTE_WIRED) 1884 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1885 KKASSERT(pmap->pm_stats.wired_count >= 0); 1886 1887 #if 0 1888 /* 1889 * Machines that don't support invlpg, also don't support 1890 * PG_G. XXX PG_G is disabled for SMP so don't worry about 1891 * the SMP case. 1892 */ 1893 if (oldpte & PG_G) 1894 cpu_invlpg((void *)va); 1895 #endif 1896 KKASSERT(pmap->pm_stats.resident_count > 0); 1897 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1898 if (oldpte & VPTE_MANAGED) { 1899 m = PHYS_TO_VM_PAGE(oldpte); 1900 1901 /* 1902 * NOTE: pmap_remove_entry() will spin-lock the page 1903 */ 1904 if (oldpte & VPTE_M) { 1905 #if defined(PMAP_DIAGNOSTIC) 1906 if (pmap_nw_modified(oldpte)) { 1907 kprintf("pmap_remove: modified page not " 1908 "writable: va: 0x%lx, pte: 0x%lx\n", 1909 va, oldpte); 1910 } 1911 #endif 1912 if (pmap_track_modified(pmap, va)) 1913 vm_page_dirty(m); 1914 } 1915 if (oldpte & VPTE_A) 1916 vm_page_flag_set(m, PG_REFERENCED); 1917 error = pmap_remove_entry(pmap, m, va); 1918 } else { 1919 error = pmap_unuse_pt(pmap, va, NULL); 1920 } 1921 return error; 1922 } 1923 1924 /* 1925 * pmap_remove_page: 1926 * 1927 * Remove a single page from a process address space. 1928 * 1929 * This function may not be called from an interrupt if the pmap is 1930 * not kernel_pmap. 1931 * 1932 * Caller holds pmap->pm_pteobj 1933 */ 1934 static void 1935 pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1936 { 1937 pt_entry_t *pte; 1938 1939 pte = pmap_pte(pmap, va); 1940 if (pte == NULL) 1941 return; 1942 if ((*pte & VPTE_V) == 0) 1943 return; 1944 pmap_remove_pte(pmap, pte, 0, va); 1945 } 1946 1947 /* 1948 * Remove the given range of addresses from the specified map. 1949 * 1950 * It is assumed that the start and end are properly rounded to 1951 * the page size. 1952 * 1953 * This function may not be called from an interrupt if the pmap is 1954 * not kernel_pmap. 1955 * 1956 * No requirements. 1957 */ 1958 void 1959 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1960 { 1961 vm_offset_t va_next; 1962 pml4_entry_t *pml4e; 1963 pdp_entry_t *pdpe; 1964 pd_entry_t ptpaddr, *pde; 1965 pt_entry_t *pte; 1966 vm_page_t pt_m; 1967 1968 if (pmap == NULL) 1969 return; 1970 1971 vm_object_hold(pmap->pm_pteobj); 1972 KKASSERT(pmap->pm_stats.resident_count >= 0); 1973 if (pmap->pm_stats.resident_count == 0) { 1974 vm_object_drop(pmap->pm_pteobj); 1975 return; 1976 } 1977 1978 /* 1979 * special handling of removing one page. a very 1980 * common operation and easy to short circuit some 1981 * code. 1982 */ 1983 if (sva + PAGE_SIZE == eva) { 1984 pde = pmap_pde(pmap, sva); 1985 if (pde && (*pde & VPTE_PS) == 0) { 1986 pmap_remove_page(pmap, sva); 1987 vm_object_drop(pmap->pm_pteobj); 1988 return; 1989 } 1990 } 1991 1992 for (; sva < eva; sva = va_next) { 1993 pml4e = pmap_pml4e(pmap, sva); 1994 if ((*pml4e & VPTE_V) == 0) { 1995 va_next = (sva + NBPML4) & ~PML4MASK; 1996 if (va_next < sva) 1997 va_next = eva; 1998 continue; 1999 } 2000 2001 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2002 if ((*pdpe & VPTE_V) == 0) { 2003 va_next = (sva + NBPDP) & ~PDPMASK; 2004 if (va_next < sva) 2005 va_next = eva; 2006 continue; 2007 } 2008 2009 /* 2010 * Calculate index for next page table. 2011 */ 2012 va_next = (sva + NBPDR) & ~PDRMASK; 2013 if (va_next < sva) 2014 va_next = eva; 2015 2016 pde = pmap_pdpe_to_pde(pdpe, sva); 2017 ptpaddr = *pde; 2018 2019 /* 2020 * Weed out invalid mappings. 2021 */ 2022 if (ptpaddr == 0) 2023 continue; 2024 2025 /* 2026 * Check for large page. 2027 */ 2028 if ((ptpaddr & VPTE_PS) != 0) { 2029 /* JG FreeBSD has more complex treatment here */ 2030 KKASSERT(*pde != 0); 2031 pmap_inval_pde(pde, pmap, sva); 2032 atomic_add_long(&pmap->pm_stats.resident_count, 2033 -NBPDR / PAGE_SIZE); 2034 continue; 2035 } 2036 2037 /* 2038 * Limit our scan to either the end of the va represented 2039 * by the current page table page, or to the end of the 2040 * range being removed. 2041 */ 2042 if (va_next > eva) 2043 va_next = eva; 2044 2045 /* 2046 * NOTE: pmap_remove_pte() can block. 2047 */ 2048 pt_m = pmap_hold_pt_page(pde, sva); 2049 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2050 sva += PAGE_SIZE) { 2051 if (*pte) { 2052 if (pmap_remove_pte(pmap, pte, 0, sva)) 2053 break; 2054 } 2055 } 2056 vm_page_unhold(pt_m); 2057 } 2058 vm_object_drop(pmap->pm_pteobj); 2059 } 2060 2061 /* 2062 * Removes this physical page from all physical maps in which it resides. 2063 * Reflects back modify bits to the pager. 2064 * 2065 * This routine may not be called from an interrupt. 2066 * 2067 * No requirements. 2068 */ 2069 static void 2070 pmap_remove_all(vm_page_t m) 2071 { 2072 pt_entry_t *pte, tpte; 2073 pv_entry_t pv; 2074 vm_object_t pmobj; 2075 pmap_t pmap; 2076 2077 #if defined(PMAP_DIAGNOSTIC) 2078 /* 2079 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 2080 * pages! 2081 */ 2082 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2083 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); 2084 } 2085 #endif 2086 2087 restart: 2088 vm_page_spin_lock(m); 2089 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2090 pmap = pv->pv_pmap; 2091 pmobj = pmap->pm_pteobj; 2092 2093 /* 2094 * Handle reversed lock ordering 2095 */ 2096 if (vm_object_hold_try(pmobj) == 0) { 2097 refcount_acquire(&pmobj->hold_count); 2098 vm_page_spin_unlock(m); 2099 vm_object_lock(pmobj); 2100 vm_page_spin_lock(m); 2101 if (pv != TAILQ_FIRST(&m->md.pv_list) || 2102 pmap != pv->pv_pmap || 2103 pmobj != pmap->pm_pteobj) { 2104 vm_page_spin_unlock(m); 2105 vm_object_drop(pmobj); 2106 goto restart; 2107 } 2108 } 2109 2110 KKASSERT(pmap->pm_stats.resident_count > 0); 2111 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2112 2113 pte = pmap_pte(pmap, pv->pv_va); 2114 KKASSERT(pte != NULL); 2115 2116 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2117 if (tpte & VPTE_WIRED) 2118 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2119 KKASSERT(pmap->pm_stats.wired_count >= 0); 2120 2121 if (tpte & VPTE_A) 2122 vm_page_flag_set(m, PG_REFERENCED); 2123 2124 /* 2125 * Update the vm_page_t clean and reference bits. 2126 */ 2127 if (tpte & VPTE_M) { 2128 #if defined(PMAP_DIAGNOSTIC) 2129 if (pmap_nw_modified(tpte)) { 2130 kprintf( 2131 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 2132 pv->pv_va, tpte); 2133 } 2134 #endif 2135 if (pmap_track_modified(pmap, pv->pv_va)) 2136 vm_page_dirty(m); 2137 } 2138 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2139 if (TAILQ_EMPTY(&m->md.pv_list)) 2140 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2141 m->md.pv_list_count--; 2142 KKASSERT(m->md.pv_list_count >= 0); 2143 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 2144 atomic_add_int(&pmap->pm_generation, 1); 2145 vm_page_spin_unlock(m); 2146 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2147 free_pv_entry(pv); 2148 2149 vm_object_drop(pmobj); 2150 vm_page_spin_lock(m); 2151 } 2152 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); 2153 vm_page_spin_unlock(m); 2154 } 2155 2156 /* 2157 * Removes the page from a particular pmap 2158 */ 2159 void 2160 pmap_remove_specific(pmap_t pmap, vm_page_t m) 2161 { 2162 pt_entry_t *pte, tpte; 2163 pv_entry_t pv; 2164 2165 vm_object_hold(pmap->pm_pteobj); 2166 again: 2167 vm_page_spin_lock(m); 2168 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2169 if (pv->pv_pmap != pmap) 2170 continue; 2171 2172 KKASSERT(pmap->pm_stats.resident_count > 0); 2173 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2174 2175 pte = pmap_pte(pmap, pv->pv_va); 2176 KKASSERT(pte != NULL); 2177 2178 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2179 if (tpte & VPTE_WIRED) 2180 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2181 KKASSERT(pmap->pm_stats.wired_count >= 0); 2182 2183 if (tpte & VPTE_A) 2184 vm_page_flag_set(m, PG_REFERENCED); 2185 2186 /* 2187 * Update the vm_page_t clean and reference bits. 2188 */ 2189 if (tpte & VPTE_M) { 2190 if (pmap_track_modified(pmap, pv->pv_va)) 2191 vm_page_dirty(m); 2192 } 2193 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2194 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 2195 atomic_add_int(&pmap->pm_generation, 1); 2196 m->md.pv_list_count--; 2197 KKASSERT(m->md.pv_list_count >= 0); 2198 if (TAILQ_EMPTY(&m->md.pv_list)) 2199 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2200 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2201 vm_page_spin_unlock(m); 2202 free_pv_entry(pv); 2203 goto again; 2204 } 2205 vm_page_spin_unlock(m); 2206 vm_object_drop(pmap->pm_pteobj); 2207 } 2208 2209 /* 2210 * Set the physical protection on the specified range of this map 2211 * as requested. 2212 * 2213 * This function may not be called from an interrupt if the map is 2214 * not the kernel_pmap. 2215 * 2216 * No requirements. 2217 */ 2218 void 2219 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2220 { 2221 vm_offset_t va_next; 2222 pml4_entry_t *pml4e; 2223 pdp_entry_t *pdpe; 2224 pd_entry_t ptpaddr, *pde; 2225 pt_entry_t *pte; 2226 vm_page_t pt_m; 2227 2228 if (pmap == NULL) 2229 return; 2230 2231 if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == VM_PROT_NONE) { 2232 pmap_remove(pmap, sva, eva); 2233 return; 2234 } 2235 2236 if (prot & VM_PROT_WRITE) 2237 return; 2238 2239 vm_object_hold(pmap->pm_pteobj); 2240 2241 for (; sva < eva; sva = va_next) { 2242 pml4e = pmap_pml4e(pmap, sva); 2243 if ((*pml4e & VPTE_V) == 0) { 2244 va_next = (sva + NBPML4) & ~PML4MASK; 2245 if (va_next < sva) 2246 va_next = eva; 2247 continue; 2248 } 2249 2250 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2251 if ((*pdpe & VPTE_V) == 0) { 2252 va_next = (sva + NBPDP) & ~PDPMASK; 2253 if (va_next < sva) 2254 va_next = eva; 2255 continue; 2256 } 2257 2258 va_next = (sva + NBPDR) & ~PDRMASK; 2259 if (va_next < sva) 2260 va_next = eva; 2261 2262 pde = pmap_pdpe_to_pde(pdpe, sva); 2263 ptpaddr = *pde; 2264 2265 #if 0 2266 /* 2267 * Check for large page. 2268 */ 2269 if ((ptpaddr & VPTE_PS) != 0) { 2270 /* JG correct? */ 2271 pmap_clean_pde(pde, pmap, sva); 2272 atomic_add_long(&pmap->pm_stats.resident_count, 2273 -NBPDR / PAGE_SIZE); 2274 continue; 2275 } 2276 #endif 2277 2278 /* 2279 * Weed out invalid mappings. Note: we assume that the page 2280 * directory table is always allocated, and in kernel virtual. 2281 */ 2282 if (ptpaddr == 0) 2283 continue; 2284 2285 if (va_next > eva) 2286 va_next = eva; 2287 2288 pt_m = pmap_hold_pt_page(pde, sva); 2289 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2290 sva += PAGE_SIZE) { 2291 /* 2292 * Clean managed pages and also check the accessed 2293 * bit. Just remove write perms for unmanaged 2294 * pages. Be careful of races, turning off write 2295 * access will force a fault rather then setting 2296 * the modified bit at an unexpected time. 2297 */ 2298 pmap_clean_pte(pte, pmap, sva, NULL); 2299 } 2300 vm_page_unhold(pt_m); 2301 } 2302 vm_object_drop(pmap->pm_pteobj); 2303 } 2304 2305 /* 2306 * Enter a managed page into a pmap. If the page is not wired related pmap 2307 * data can be destroyed at any time for later demand-operation. 2308 * 2309 * Insert the vm_page (m) at virtual address (v) in (pmap), with the 2310 * specified protection, and wire the mapping if requested. 2311 * 2312 * NOTE: This routine may not lazy-evaluate or lose information. The 2313 * page must actually be inserted into the given map NOW. 2314 * 2315 * NOTE: When entering a page at a KVA address, the pmap must be the 2316 * kernel_pmap. 2317 * 2318 * No requirements. 2319 */ 2320 void 2321 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2322 boolean_t wired, vm_map_entry_t entry __unused) 2323 { 2324 vm_paddr_t pa; 2325 pv_entry_t pv; 2326 pt_entry_t *pte; 2327 pt_entry_t origpte, newpte; 2328 vm_paddr_t opa; 2329 vm_page_t mpte; 2330 2331 if (pmap == NULL) 2332 return; 2333 2334 va = trunc_page(va); 2335 2336 vm_object_hold(pmap->pm_pteobj); 2337 2338 /* 2339 * Get the page table page. The kernel_pmap's page table pages 2340 * are preallocated and have no associated vm_page_t. 2341 * 2342 * If not NULL, mpte will be busied and we must vm_page_wakeup() 2343 * to cleanup. There will already be at least one wire count from 2344 * it being mapped into its parent. 2345 */ 2346 if (pmap == &kernel_pmap) { 2347 mpte = NULL; 2348 pte = vtopte(va); 2349 } else { 2350 mpte = pmap_allocpte(pmap, va); 2351 pte = (void *)PHYS_TO_DMAP(mpte->phys_addr); 2352 pte += pmap_pte_index(va); 2353 } 2354 2355 /* 2356 * Deal with races against the kernel's real MMU by cleaning the 2357 * page, even if we are re-entering the same page. 2358 */ 2359 pa = VM_PAGE_TO_PHYS(m); 2360 origpte = pmap_inval_loadandclear(pte, pmap, va); 2361 /*origpte = pmap_clean_pte(pte, pmap, va, NULL);*/ 2362 opa = origpte & VPTE_FRAME; 2363 2364 if (origpte & VPTE_PS) 2365 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2366 2367 if ((origpte & (VPTE_MANAGED|VPTE_M)) == (VPTE_MANAGED|VPTE_M)) { 2368 if (pmap_track_modified(pmap, va)) { 2369 vm_page_t om = PHYS_TO_VM_PAGE(opa); 2370 vm_page_dirty(om); 2371 } 2372 } 2373 2374 /* 2375 * Mapping has not changed, must be protection or wiring change. 2376 */ 2377 if (origpte && (opa == pa)) { 2378 /* 2379 * Wiring change, just update stats. We don't worry about 2380 * wiring PT pages as they remain resident as long as there 2381 * are valid mappings in them. Hence, if a user page is wired, 2382 * the PT page will be also. 2383 */ 2384 if (wired && ((origpte & VPTE_WIRED) == 0)) 2385 atomic_add_long(&pmap->pm_stats.wired_count, 1); 2386 else if (!wired && (origpte & VPTE_WIRED)) 2387 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2388 2389 if (origpte & VPTE_MANAGED) { 2390 pa |= VPTE_MANAGED; 2391 KKASSERT(m->flags & PG_MAPPED); 2392 KKASSERT(!(m->flags & (PG_FICTITIOUS|PG_UNMANAGED))); 2393 } else { 2394 KKASSERT((m->flags & (PG_FICTITIOUS|PG_UNMANAGED))); 2395 } 2396 vm_page_spin_lock(m); 2397 goto validate; 2398 } 2399 2400 /* 2401 * Bump the wire_count for the page table page. 2402 */ 2403 if (mpte) 2404 vm_page_wire_quick(mpte); 2405 2406 /* 2407 * Mapping has changed, invalidate old range and fall through to 2408 * handle validating new mapping. Don't inherit anything from 2409 * oldpte. 2410 */ 2411 if (opa) { 2412 int err; 2413 err = pmap_remove_pte(pmap, NULL, origpte, va); 2414 origpte = 0; 2415 if (err) 2416 panic("pmap_enter: pte vanished, va: 0x%lx", va); 2417 } 2418 2419 /* 2420 * Enter on the PV list if part of our managed memory. Note that we 2421 * raise IPL while manipulating pv_table since pmap_enter can be 2422 * called at interrupt time. 2423 */ 2424 if (pmap_initialized) { 2425 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2426 /* 2427 * WARNING! We are using m's spin-lock as a 2428 * man's pte lock to interlock against 2429 * pmap_page_protect() operations. 2430 * 2431 * This is a bad hack (obviously). 2432 */ 2433 pv = get_pv_entry(); 2434 vm_page_spin_lock(m); 2435 pmap_insert_entry(pmap, va, mpte, m, pv); 2436 pa |= VPTE_MANAGED; 2437 /* vm_page_spin_unlock(m); */ 2438 } else { 2439 vm_page_spin_lock(m); 2440 } 2441 } else { 2442 vm_page_spin_lock(m); 2443 } 2444 2445 /* 2446 * Increment counters 2447 */ 2448 atomic_add_long(&pmap->pm_stats.resident_count, 1); 2449 if (wired) 2450 atomic_add_long(&pmap->pm_stats.wired_count, 1); 2451 2452 validate: 2453 /* 2454 * Now validate mapping with desired protection/wiring. 2455 */ 2456 newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | VPTE_V | VPTE_U); 2457 newpte |= VPTE_A; 2458 2459 if (wired) 2460 newpte |= VPTE_WIRED; 2461 // if (pmap != &kernel_pmap) 2462 newpte |= VPTE_U; 2463 if (newpte & VPTE_RW) 2464 vm_page_flag_set(m, PG_WRITEABLE); 2465 KKASSERT((newpte & VPTE_MANAGED) == 0 || (m->flags & PG_MAPPED)); 2466 2467 origpte = atomic_swap_long(pte, newpte); 2468 if (origpte & VPTE_M) { 2469 kprintf("pmap [M] race @ %016jx\n", va); 2470 atomic_set_long(pte, VPTE_M); 2471 } 2472 vm_page_spin_unlock(m); 2473 2474 if (mpte) 2475 vm_page_wakeup(mpte); 2476 vm_object_drop(pmap->pm_pteobj); 2477 } 2478 2479 /* 2480 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. 2481 * 2482 * Currently this routine may only be used on user pmaps, not kernel_pmap. 2483 * 2484 * No requirements. 2485 */ 2486 void 2487 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) 2488 { 2489 pmap_enter(pmap, va, m, VM_PROT_READ, 0, NULL); 2490 } 2491 2492 /* 2493 * Make a temporary mapping for a physical address. This is only intended 2494 * to be used for panic dumps. 2495 * 2496 * The caller is responsible for calling smp_invltlb(). 2497 */ 2498 void * 2499 pmap_kenter_temporary(vm_paddr_t pa, long i) 2500 { 2501 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa); 2502 return ((void *)crashdumpmap); 2503 } 2504 2505 #define MAX_INIT_PT (96) 2506 2507 /* 2508 * This routine preloads the ptes for a given object into the specified pmap. 2509 * This eliminates the blast of soft faults on process startup and 2510 * immediately after an mmap. 2511 * 2512 * No requirements. 2513 */ 2514 static int pmap_object_init_pt_callback(vm_page_t p, void *data); 2515 2516 void 2517 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, 2518 vm_object_t object, vm_pindex_t pindex, 2519 vm_size_t size, int limit) 2520 { 2521 struct rb_vm_page_scan_info info; 2522 struct lwp *lp; 2523 vm_size_t psize; 2524 2525 /* 2526 * We can't preinit if read access isn't set or there is no pmap 2527 * or object. 2528 */ 2529 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) 2530 return; 2531 2532 /* 2533 * We can't preinit if the pmap is not the current pmap 2534 */ 2535 lp = curthread->td_lwp; 2536 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) 2537 return; 2538 2539 /* 2540 * Misc additional checks 2541 */ 2542 psize = x86_64_btop(size); 2543 2544 if ((object->type != OBJT_VNODE) || 2545 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2546 (object->resident_page_count > MAX_INIT_PT))) { 2547 return; 2548 } 2549 2550 if (psize + pindex > object->size) { 2551 if (object->size < pindex) 2552 return; 2553 psize = object->size - pindex; 2554 } 2555 2556 if (psize == 0) 2557 return; 2558 2559 /* 2560 * Use a red-black scan to traverse the requested range and load 2561 * any valid pages found into the pmap. 2562 * 2563 * We cannot safely scan the object's memq unless we are in a 2564 * critical section since interrupts can remove pages from objects. 2565 */ 2566 info.start_pindex = pindex; 2567 info.end_pindex = pindex + psize - 1; 2568 info.limit = limit; 2569 info.mpte = NULL; 2570 info.addr = addr; 2571 info.pmap = pmap; 2572 2573 vm_object_hold_shared(object); 2574 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, 2575 pmap_object_init_pt_callback, &info); 2576 vm_object_drop(object); 2577 } 2578 2579 static 2580 int 2581 pmap_object_init_pt_callback(vm_page_t p, void *data) 2582 { 2583 struct rb_vm_page_scan_info *info = data; 2584 vm_pindex_t rel_index; 2585 /* 2586 * don't allow an madvise to blow away our really 2587 * free pages allocating pv entries. 2588 */ 2589 if ((info->limit & MAP_PREFAULT_MADVISE) && 2590 vmstats.v_free_count < vmstats.v_free_reserved) { 2591 return(-1); 2592 } 2593 2594 /* 2595 * Ignore list markers and ignore pages we cannot instantly 2596 * busy (while holding the object token). 2597 */ 2598 if (p->flags & PG_MARKER) 2599 return 0; 2600 if (vm_page_busy_try(p, TRUE)) 2601 return 0; 2602 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2603 (p->flags & PG_FICTITIOUS) == 0) { 2604 if ((p->queue - p->pc) == PQ_CACHE) 2605 vm_page_deactivate(p); 2606 rel_index = p->pindex - info->start_pindex; 2607 pmap_enter_quick(info->pmap, 2608 info->addr + x86_64_ptob(rel_index), p); 2609 } 2610 vm_page_wakeup(p); 2611 return(0); 2612 } 2613 2614 /* 2615 * Return TRUE if the pmap is in shape to trivially 2616 * pre-fault the specified address. 2617 * 2618 * Returns FALSE if it would be non-trivial or if a 2619 * pte is already loaded into the slot. 2620 * 2621 * No requirements. 2622 */ 2623 int 2624 pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) 2625 { 2626 pt_entry_t *pte; 2627 pd_entry_t *pde; 2628 int ret; 2629 2630 vm_object_hold(pmap->pm_pteobj); 2631 pde = pmap_pde(pmap, addr); 2632 if (pde == NULL || *pde == 0) { 2633 ret = 0; 2634 } else { 2635 pte = pmap_pde_to_pte(pde, addr); 2636 ret = (*pte) ? 0 : 1; 2637 } 2638 vm_object_drop(pmap->pm_pteobj); 2639 2640 return (ret); 2641 } 2642 2643 /* 2644 * Change the wiring attribute for a map/virtual-address pair. 2645 * 2646 * The mapping must already exist in the pmap. 2647 * No other requirements. 2648 */ 2649 vm_page_t 2650 pmap_unwire(pmap_t pmap, vm_offset_t va) 2651 { 2652 pt_entry_t *pte; 2653 vm_paddr_t pa; 2654 vm_page_t m; 2655 2656 if (pmap == NULL) 2657 return NULL; 2658 2659 vm_object_hold(pmap->pm_pteobj); 2660 pte = pmap_pte(pmap, va); 2661 2662 if (pte == NULL || (*pte & VPTE_V) == 0) { 2663 vm_object_drop(pmap->pm_pteobj); 2664 return NULL; 2665 } 2666 2667 /* 2668 * Wiring is not a hardware characteristic so there is no need to 2669 * invalidate TLB. However, in an SMP environment we must use 2670 * a locked bus cycle to update the pte (if we are not using 2671 * the pmap_inval_*() API that is)... it's ok to do this for simple 2672 * wiring changes. 2673 */ 2674 if (pmap_pte_w(pte)) 2675 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2676 /* XXX else return NULL so caller doesn't unwire m ? */ 2677 atomic_clear_long(pte, VPTE_WIRED); 2678 2679 pa = *pte & VPTE_FRAME; 2680 m = PHYS_TO_VM_PAGE(pa); /* held by wired count */ 2681 2682 vm_object_drop(pmap->pm_pteobj); 2683 2684 return m; 2685 } 2686 2687 /* 2688 * Copy the range specified by src_addr/len 2689 * from the source map to the range dst_addr/len 2690 * in the destination map. 2691 * 2692 * This routine is only advisory and need not do anything. 2693 */ 2694 void 2695 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2696 vm_size_t len, vm_offset_t src_addr) 2697 { 2698 /* 2699 * XXX BUGGY. Amoung other things srcmpte is assumed to remain 2700 * valid through blocking calls, and that's just not going to 2701 * be the case. 2702 * 2703 * FIXME! 2704 */ 2705 return; 2706 } 2707 2708 /* 2709 * pmap_zero_page: 2710 * 2711 * Zero the specified physical page. 2712 * 2713 * This function may be called from an interrupt and no locking is 2714 * required. 2715 */ 2716 void 2717 pmap_zero_page(vm_paddr_t phys) 2718 { 2719 vm_offset_t va = PHYS_TO_DMAP(phys); 2720 2721 bzero((void *)va, PAGE_SIZE); 2722 } 2723 2724 /* 2725 * pmap_zero_page: 2726 * 2727 * Zero part of a physical page by mapping it into memory and clearing 2728 * its contents with bzero. 2729 * 2730 * off and size may not cover an area beyond a single hardware page. 2731 */ 2732 void 2733 pmap_zero_page_area(vm_paddr_t phys, int off, int size) 2734 { 2735 vm_offset_t virt = PHYS_TO_DMAP(phys); 2736 2737 bzero((char *)virt + off, size); 2738 } 2739 2740 /* 2741 * pmap_copy_page: 2742 * 2743 * Copy the physical page from the source PA to the target PA. 2744 * This function may be called from an interrupt. No locking 2745 * is required. 2746 */ 2747 void 2748 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) 2749 { 2750 vm_offset_t src_virt, dst_virt; 2751 2752 src_virt = PHYS_TO_DMAP(src); 2753 dst_virt = PHYS_TO_DMAP(dst); 2754 bcopy((void *)src_virt, (void *)dst_virt, PAGE_SIZE); 2755 } 2756 2757 /* 2758 * pmap_copy_page_frag: 2759 * 2760 * Copy the physical page from the source PA to the target PA. 2761 * This function may be called from an interrupt. No locking 2762 * is required. 2763 */ 2764 void 2765 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) 2766 { 2767 vm_offset_t src_virt, dst_virt; 2768 2769 src_virt = PHYS_TO_DMAP(src); 2770 dst_virt = PHYS_TO_DMAP(dst); 2771 bcopy((char *)src_virt + (src & PAGE_MASK), 2772 (char *)dst_virt + (dst & PAGE_MASK), 2773 bytes); 2774 } 2775 2776 /* 2777 * Returns true if the pmap's pv is one of the first 16 pvs linked to 2778 * from this page. This count may be changed upwards or downwards 2779 * in the future; it is only necessary that true be returned for a small 2780 * subset of pmaps for proper page aging. 2781 * 2782 * No other requirements. 2783 */ 2784 boolean_t 2785 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2786 { 2787 pv_entry_t pv; 2788 int loops = 0; 2789 2790 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2791 return FALSE; 2792 2793 vm_page_spin_lock(m); 2794 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2795 if (pv->pv_pmap == pmap) { 2796 vm_page_spin_unlock(m); 2797 return TRUE; 2798 } 2799 loops++; 2800 if (loops >= 16) 2801 break; 2802 } 2803 vm_page_spin_unlock(m); 2804 2805 return (FALSE); 2806 } 2807 2808 /* 2809 * Remove all pages from specified address space this aids process 2810 * exit speeds. Also, this code is special cased for current 2811 * process only, but can have the more generic (and slightly slower) 2812 * mode enabled. This is much faster than pmap_remove in the case 2813 * of running down an entire address space. 2814 * 2815 * No other requirements. 2816 */ 2817 void 2818 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2819 { 2820 pmap_remove(pmap, sva, eva); 2821 #if 0 2822 pt_entry_t *pte, tpte; 2823 pv_entry_t pv, npv; 2824 vm_page_t m; 2825 int save_generation; 2826 2827 if (pmap->pm_pteobj) 2828 vm_object_hold(pmap->pm_pteobj); 2829 2830 pmap_invalidate_range(pmap, sva, eva); 2831 2832 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2833 if (pv->pv_va >= eva || pv->pv_va < sva) { 2834 npv = TAILQ_NEXT(pv, pv_plist); 2835 continue; 2836 } 2837 2838 KKASSERT(pmap == pv->pv_pmap); 2839 2840 pte = pmap_pte(pmap, pv->pv_va); 2841 2842 /* 2843 * We cannot remove wired pages from a process' mapping 2844 * at this time 2845 */ 2846 if (*pte & VPTE_WIRED) { 2847 npv = TAILQ_NEXT(pv, pv_plist); 2848 continue; 2849 } 2850 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2851 2852 m = PHYS_TO_VM_PAGE(tpte & VPTE_FRAME); 2853 vm_page_spin_lock(m); 2854 2855 KASSERT(m < &vm_page_array[vm_page_array_size], 2856 ("pmap_remove_pages: bad tpte %lx", tpte)); 2857 2858 KKASSERT(pmap->pm_stats.resident_count > 0); 2859 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2860 2861 /* 2862 * Update the vm_page_t clean and reference bits. 2863 */ 2864 if (tpte & VPTE_M) { 2865 vm_page_dirty(m); 2866 } 2867 2868 npv = TAILQ_NEXT(pv, pv_plist); 2869 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 2870 atomic_add_int(&pmap->pm_generation, 1); 2871 save_generation = pmap->pm_generation; 2872 m->md.pv_list_count--; 2873 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2874 if (TAILQ_EMPTY(&m->md.pv_list)) 2875 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2876 vm_page_spin_unlock(m); 2877 2878 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2879 free_pv_entry(pv); 2880 2881 /* 2882 * Restart the scan if we blocked during the unuse or free 2883 * calls and other removals were made. 2884 */ 2885 if (save_generation != pmap->pm_generation) { 2886 kprintf("Warning: pmap_remove_pages race-A avoided\n"); 2887 npv = TAILQ_FIRST(&pmap->pm_pvlist); 2888 } 2889 } 2890 if (pmap->pm_pteobj) 2891 vm_object_drop(pmap->pm_pteobj); 2892 pmap_remove(pmap, sva, eva); 2893 #endif 2894 } 2895 2896 /* 2897 * pmap_testbit tests bits in active mappings of a VM page. 2898 */ 2899 static boolean_t 2900 pmap_testbit(vm_page_t m, int bit) 2901 { 2902 pv_entry_t pv; 2903 pt_entry_t *pte; 2904 2905 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2906 return FALSE; 2907 2908 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2909 return FALSE; 2910 2911 vm_page_spin_lock(m); 2912 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2913 /* 2914 * if the bit being tested is the modified bit, then 2915 * mark clean_map and ptes as never 2916 * modified. 2917 */ 2918 if (bit & (VPTE_A|VPTE_M)) { 2919 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 2920 continue; 2921 } 2922 2923 #if defined(PMAP_DIAGNOSTIC) 2924 if (pv->pv_pmap == NULL) { 2925 kprintf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 2926 continue; 2927 } 2928 #endif 2929 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2930 if (*pte & bit) { 2931 vm_page_spin_unlock(m); 2932 return TRUE; 2933 } 2934 } 2935 vm_page_spin_unlock(m); 2936 return (FALSE); 2937 } 2938 2939 /* 2940 * This routine is used to clear bits in ptes. Certain bits require special 2941 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. 2942 * 2943 * This routine is only called with certain VPTE_* bit combinations. 2944 */ 2945 static __inline void 2946 pmap_clearbit(vm_page_t m, int bit) 2947 { 2948 pv_entry_t pv; 2949 pt_entry_t *pte; 2950 pt_entry_t pbits; 2951 vm_object_t pmobj; 2952 pmap_t pmap; 2953 2954 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2955 if (bit == VPTE_RW) 2956 vm_page_flag_clear(m, PG_WRITEABLE); 2957 return; 2958 } 2959 2960 /* 2961 * Loop over all current mappings setting/clearing as appropos If 2962 * setting RO do we need to clear the VAC? 2963 */ 2964 restart: 2965 vm_page_spin_lock(m); 2966 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2967 /* 2968 * Need the pmap object lock(?) 2969 */ 2970 pmap = pv->pv_pmap; 2971 pmobj = pmap->pm_pteobj; 2972 2973 if (vm_object_hold_try(pmobj) == 0) { 2974 refcount_acquire(&pmobj->hold_count); 2975 vm_page_spin_unlock(m); 2976 vm_object_lock(pmobj); 2977 vm_object_drop(pmobj); 2978 goto restart; 2979 } 2980 2981 /* 2982 * don't write protect pager mappings 2983 */ 2984 if (bit == VPTE_RW) { 2985 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) { 2986 vm_object_drop(pmobj); 2987 continue; 2988 } 2989 } 2990 2991 #if defined(PMAP_DIAGNOSTIC) 2992 if (pv->pv_pmap == NULL) { 2993 kprintf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va); 2994 vm_object_drop(pmobj); 2995 continue; 2996 } 2997 #endif 2998 2999 /* 3000 * Careful here. We can use a locked bus instruction to 3001 * clear VPTE_A or VPTE_M safely but we need to synchronize 3002 * with the target cpus when we mess with VPTE_RW. 3003 * 3004 * On virtual kernels we must force a new fault-on-write 3005 * in the real kernel if we clear the Modify bit ourselves, 3006 * otherwise the real kernel will not get a new fault and 3007 * will never set our Modify bit again. 3008 */ 3009 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3010 if (*pte & bit) { 3011 if (bit == VPTE_RW) { 3012 /* 3013 * We must also clear VPTE_M when clearing 3014 * VPTE_RW and synchronize its state to 3015 * the page. 3016 */ 3017 pbits = pmap_clean_pte(pte, pv->pv_pmap, 3018 pv->pv_va, m); 3019 } else if (bit == VPTE_M) { 3020 /* 3021 * We must invalidate the real-kernel pte 3022 * when clearing VPTE_M bit to force the 3023 * real-kernel to take a new fault to re-set 3024 * VPTE_M. 3025 */ 3026 atomic_clear_long(pte, VPTE_M); 3027 if (*pte & VPTE_RW) { 3028 pmap_invalidate_range(pv->pv_pmap, 3029 pv->pv_va, 3030 pv->pv_va + PAGE_SIZE); 3031 } 3032 } else if ((bit & (VPTE_RW|VPTE_M)) == 3033 (VPTE_RW|VPTE_M)) { 3034 /* 3035 * We've been asked to clear W & M, I guess 3036 * the caller doesn't want us to update 3037 * the dirty status of the VM page. 3038 */ 3039 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va, m); 3040 panic("shouldn't be called"); 3041 } else { 3042 /* 3043 * We've been asked to clear bits that do 3044 * not interact with hardware. 3045 */ 3046 atomic_clear_long(pte, bit); 3047 } 3048 } 3049 vm_object_drop(pmobj); 3050 } 3051 if (bit == VPTE_RW) 3052 vm_page_flag_clear(m, PG_WRITEABLE); 3053 vm_page_spin_unlock(m); 3054 } 3055 3056 /* 3057 * Lower the permission for all mappings to a given page. 3058 * 3059 * No other requirements. 3060 */ 3061 void 3062 pmap_page_protect(vm_page_t m, vm_prot_t prot) 3063 { 3064 if ((prot & VM_PROT_WRITE) == 0) { 3065 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3066 pmap_clearbit(m, VPTE_RW); 3067 } else { 3068 pmap_remove_all(m); 3069 } 3070 } 3071 } 3072 3073 vm_paddr_t 3074 pmap_phys_address(vm_pindex_t ppn) 3075 { 3076 return (x86_64_ptob(ppn)); 3077 } 3078 3079 /* 3080 * Return a count of reference bits for a page, clearing those bits. 3081 * It is not necessary for every reference bit to be cleared, but it 3082 * is necessary that 0 only be returned when there are truly no 3083 * reference bits set. 3084 * 3085 * XXX: The exact number of bits to check and clear is a matter that 3086 * should be tested and standardized at some point in the future for 3087 * optimal aging of shared pages. 3088 * 3089 * No other requirements. 3090 */ 3091 int 3092 pmap_ts_referenced(vm_page_t m) 3093 { 3094 pv_entry_t pv, pvf, pvn; 3095 pt_entry_t *pte; 3096 int rtval = 0; 3097 3098 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3099 return (rtval); 3100 3101 vm_page_spin_lock(m); 3102 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3103 pvf = pv; 3104 do { 3105 pvn = TAILQ_NEXT(pv, pv_list); 3106 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3107 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3108 3109 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3110 continue; 3111 3112 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3113 3114 if (pte && (*pte & VPTE_A)) { 3115 atomic_clear_long(pte, VPTE_A); 3116 rtval++; 3117 if (rtval > 4) { 3118 break; 3119 } 3120 } 3121 } while ((pv = pvn) != NULL && pv != pvf); 3122 } 3123 vm_page_spin_unlock(m); 3124 3125 return (rtval); 3126 } 3127 3128 /* 3129 * Return whether or not the specified physical page was modified 3130 * in any physical maps. 3131 * 3132 * No other requirements. 3133 */ 3134 boolean_t 3135 pmap_is_modified(vm_page_t m) 3136 { 3137 boolean_t res; 3138 3139 res = pmap_testbit(m, VPTE_M); 3140 3141 return (res); 3142 } 3143 3144 /* 3145 * Clear the modify bits on the specified physical page. For the vkernel 3146 * we really need to clean the page, which clears VPTE_RW and VPTE_M, in 3147 * order to ensure that we take a fault on the next write to the page. 3148 * Otherwise the page may become dirty without us knowing it. 3149 * 3150 * No other requirements. 3151 */ 3152 void 3153 pmap_clear_modify(vm_page_t m) 3154 { 3155 pmap_clearbit(m, VPTE_RW); 3156 } 3157 3158 /* 3159 * Clear the reference bit on the specified physical page. 3160 * 3161 * No other requirements. 3162 */ 3163 void 3164 pmap_clear_reference(vm_page_t m) 3165 { 3166 pmap_clearbit(m, VPTE_A); 3167 } 3168 3169 /* 3170 * Miscellaneous support routines follow 3171 */ 3172 static void 3173 i386_protection_init(void) 3174 { 3175 uint64_t *kp; 3176 int prot; 3177 3178 kp = protection_codes; 3179 for (prot = 0; prot < 8; prot++) { 3180 if (prot & VM_PROT_READ) 3181 *kp |= 0; /* R */ 3182 if (prot & VM_PROT_WRITE) 3183 *kp |= VPTE_RW; /* R+W */ 3184 if (prot && (prot & VM_PROT_EXECUTE) == 0) 3185 *kp |= VPTE_NX; /* NX - !executable */ 3186 ++kp; 3187 } 3188 } 3189 3190 /* 3191 * Sets the memory attribute for the specified page. 3192 */ 3193 void 3194 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3195 { 3196 /* This is a vkernel, do nothing */ 3197 } 3198 3199 /* 3200 * Change the PAT attribute on an existing kernel memory map. Caller 3201 * must ensure that the virtual memory in question is not accessed 3202 * during the adjustment. 3203 */ 3204 void 3205 pmap_change_attr(vm_offset_t va, vm_size_t count, int mode) 3206 { 3207 /* This is a vkernel, do nothing */ 3208 } 3209 3210 /* 3211 * Perform the pmap work for mincore 3212 * 3213 * No other requirements. 3214 */ 3215 int 3216 pmap_mincore(pmap_t pmap, vm_offset_t addr) 3217 { 3218 pt_entry_t *ptep, pte; 3219 vm_page_t m; 3220 int val = 0; 3221 3222 vm_object_hold(pmap->pm_pteobj); 3223 ptep = pmap_pte(pmap, addr); 3224 3225 if (ptep && (pte = *ptep) != 0) { 3226 vm_paddr_t pa; 3227 3228 val = MINCORE_INCORE; 3229 if ((pte & VPTE_MANAGED) == 0) 3230 goto done; 3231 3232 pa = pte & VPTE_FRAME; 3233 3234 m = PHYS_TO_VM_PAGE(pa); 3235 3236 /* 3237 * Modified by us 3238 */ 3239 if (pte & VPTE_M) 3240 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3241 /* 3242 * Modified by someone 3243 */ 3244 else if (m->dirty || pmap_is_modified(m)) 3245 val |= MINCORE_MODIFIED_OTHER; 3246 /* 3247 * Referenced by us 3248 */ 3249 if (pte & VPTE_A) 3250 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3251 3252 /* 3253 * Referenced by someone 3254 */ 3255 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3256 val |= MINCORE_REFERENCED_OTHER; 3257 vm_page_flag_set(m, PG_REFERENCED); 3258 } 3259 } 3260 done: 3261 vm_object_drop(pmap->pm_pteobj); 3262 3263 return val; 3264 } 3265 3266 /* 3267 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new 3268 * vmspace will be ref'd and the old one will be deref'd. 3269 * 3270 * Caller must hold vmspace->vm_map.token for oldvm and newvm 3271 */ 3272 void 3273 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) 3274 { 3275 struct vmspace *oldvm; 3276 struct lwp *lp; 3277 3278 oldvm = p->p_vmspace; 3279 if (oldvm != newvm) { 3280 if (adjrefs) 3281 vmspace_ref(newvm); 3282 KKASSERT((newvm->vm_refcnt & VM_REF_DELETED) == 0); 3283 p->p_vmspace = newvm; 3284 KKASSERT(p->p_nthreads == 1); 3285 lp = RB_ROOT(&p->p_lwp_tree); 3286 pmap_setlwpvm(lp, newvm); 3287 if (adjrefs) 3288 vmspace_rel(oldvm); 3289 } 3290 } 3291 3292 /* 3293 * Set the vmspace for a LWP. The vmspace is almost universally set the 3294 * same as the process vmspace, but virtual kernels need to swap out contexts 3295 * on a per-lwp basis. 3296 */ 3297 void 3298 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) 3299 { 3300 struct vmspace *oldvm; 3301 struct pmap *pmap; 3302 3303 oldvm = lp->lwp_vmspace; 3304 if (oldvm != newvm) { 3305 crit_enter(); 3306 KKASSERT((newvm->vm_refcnt & VM_REF_DELETED) == 0); 3307 lp->lwp_vmspace = newvm; 3308 if (curthread->td_lwp == lp) { 3309 pmap = vmspace_pmap(newvm); 3310 ATOMIC_CPUMASK_ORBIT(pmap->pm_active, mycpu->gd_cpuid); 3311 if (pmap->pm_active_lock & CPULOCK_EXCL) 3312 pmap_interlock_wait(newvm); 3313 #if defined(SWTCH_OPTIM_STATS) 3314 tlb_flush_count++; 3315 #endif 3316 pmap = vmspace_pmap(oldvm); 3317 ATOMIC_CPUMASK_NANDBIT(pmap->pm_active, 3318 mycpu->gd_cpuid); 3319 } 3320 crit_exit(); 3321 } 3322 } 3323 3324 /* 3325 * The swtch code tried to switch in a heavy weight process whos pmap 3326 * is locked by another cpu. We have to wait for the lock to clear before 3327 * the pmap can be used. 3328 */ 3329 void 3330 pmap_interlock_wait (struct vmspace *vm) 3331 { 3332 pmap_t pmap = vmspace_pmap(vm); 3333 3334 if (pmap->pm_active_lock & CPULOCK_EXCL) { 3335 crit_enter(); 3336 while (pmap->pm_active_lock & CPULOCK_EXCL) { 3337 cpu_ccfence(); 3338 pthread_yield(); 3339 } 3340 crit_exit(); 3341 } 3342 } 3343 3344 vm_offset_t 3345 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3346 { 3347 3348 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3349 return addr; 3350 } 3351 3352 addr = roundup2(addr, NBPDR); 3353 return addr; 3354 } 3355 3356 /* 3357 * Used by kmalloc/kfree, page already exists at va 3358 */ 3359 vm_page_t 3360 pmap_kvtom(vm_offset_t va) 3361 { 3362 vpte_t *ptep; 3363 3364 KKASSERT(va >= KvaStart && va < KvaEnd); 3365 ptep = vtopte(va); 3366 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); 3367 } 3368 3369 void 3370 pmap_object_init(vm_object_t object) 3371 { 3372 /* empty */ 3373 } 3374 3375 void 3376 pmap_object_free(vm_object_t object) 3377 { 3378 /* empty */ 3379 } 3380 3381 void 3382 pmap_pgscan(struct pmap_pgscan_info *pginfo) 3383 { 3384 pmap_t pmap = pginfo->pmap; 3385 vm_offset_t sva = pginfo->beg_addr; 3386 vm_offset_t eva = pginfo->end_addr; 3387 vm_offset_t va_next; 3388 pml4_entry_t *pml4e; 3389 pdp_entry_t *pdpe; 3390 pd_entry_t ptpaddr, *pde; 3391 pt_entry_t *pte; 3392 vm_page_t pt_m; 3393 int stop = 0; 3394 3395 vm_object_hold(pmap->pm_pteobj); 3396 3397 for (; sva < eva; sva = va_next) { 3398 if (stop) 3399 break; 3400 3401 pml4e = pmap_pml4e(pmap, sva); 3402 if ((*pml4e & VPTE_V) == 0) { 3403 va_next = (sva + NBPML4) & ~PML4MASK; 3404 if (va_next < sva) 3405 va_next = eva; 3406 continue; 3407 } 3408 3409 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 3410 if ((*pdpe & VPTE_V) == 0) { 3411 va_next = (sva + NBPDP) & ~PDPMASK; 3412 if (va_next < sva) 3413 va_next = eva; 3414 continue; 3415 } 3416 3417 va_next = (sva + NBPDR) & ~PDRMASK; 3418 if (va_next < sva) 3419 va_next = eva; 3420 3421 pde = pmap_pdpe_to_pde(pdpe, sva); 3422 ptpaddr = *pde; 3423 3424 #if 0 3425 /* 3426 * Check for large page (ignore). 3427 */ 3428 if ((ptpaddr & VPTE_PS) != 0) { 3429 #if 0 3430 pmap_clean_pde(pde, pmap, sva); 3431 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 3432 #endif 3433 continue; 3434 } 3435 #endif 3436 3437 /* 3438 * Weed out invalid mappings. Note: we assume that the page 3439 * directory table is always allocated, and in kernel virtual. 3440 */ 3441 if (ptpaddr == 0) 3442 continue; 3443 3444 if (va_next > eva) 3445 va_next = eva; 3446 3447 pt_m = pmap_hold_pt_page(pde, sva); 3448 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 3449 sva += PAGE_SIZE) { 3450 vm_page_t m; 3451 3452 if (stop) 3453 break; 3454 if ((*pte & VPTE_MANAGED) == 0) 3455 continue; 3456 3457 m = PHYS_TO_VM_PAGE(*pte & VPTE_FRAME); 3458 if (vm_page_busy_try(m, TRUE) == 0) { 3459 if (pginfo->callback(pginfo, sva, m) < 0) 3460 stop = 1; 3461 } 3462 } 3463 vm_page_unhold(pt_m); 3464 } 3465 vm_object_drop(pmap->pm_pteobj); 3466 } 3467