1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * Copyright (c) 1994 John S. Dyson 4 * Copyright (c) 1994 David Greenman 5 * Copyright (c) 2003 Peter Wemm 6 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 7 * Copyright (c) 2008, 2009 The DragonFly Project. 8 * Copyright (c) 2008, 2009 Jordan Gordeev. 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ 45 */ 46 47 /* 48 * Manages physical address maps. 49 */ 50 51 #include "opt_msgbuf.h" 52 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/proc.h> 57 #include <sys/msgbuf.h> 58 #include <sys/vmmeter.h> 59 #include <sys/mman.h> 60 #include <sys/vmspace.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <sys/sysctl.h> 65 #include <sys/lock.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_pager.h> 73 #include <vm/vm_zone.h> 74 75 #include <sys/user.h> 76 #include <sys/thread2.h> 77 #include <sys/sysref2.h> 78 #include <sys/spinlock2.h> 79 #include <vm/vm_page2.h> 80 81 #include <machine/cputypes.h> 82 #include <machine/md_var.h> 83 #include <machine/specialreg.h> 84 #include <machine/smp.h> 85 #include <machine/globaldata.h> 86 #include <machine/pmap.h> 87 #include <machine/pmap_inval.h> 88 89 #include <ddb/ddb.h> 90 91 #include <stdio.h> 92 #include <assert.h> 93 #include <stdlib.h> 94 #include <pthread.h> 95 96 #define PMAP_KEEP_PDIRS 97 #ifndef PMAP_SHPGPERPROC 98 #define PMAP_SHPGPERPROC 1000 99 #endif 100 101 #if defined(DIAGNOSTIC) 102 #define PMAP_DIAGNOSTIC 103 #endif 104 105 #define MINPV 2048 106 107 #if !defined(PMAP_DIAGNOSTIC) 108 #define PMAP_INLINE __inline 109 #else 110 #define PMAP_INLINE 111 #endif 112 113 /* 114 * Get PDEs and PTEs for user/kernel address space 115 */ 116 static pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va); 117 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 118 119 #define pmap_pde_v(pte) ((*(pd_entry_t *)pte & VPTE_V) != 0) 120 #define pmap_pte_w(pte) ((*(pt_entry_t *)pte & VPTE_WIRED) != 0) 121 #define pmap_pte_m(pte) ((*(pt_entry_t *)pte & VPTE_M) != 0) 122 #define pmap_pte_u(pte) ((*(pt_entry_t *)pte & VPTE_A) != 0) 123 #define pmap_pte_v(pte) ((*(pt_entry_t *)pte & VPTE_V) != 0) 124 125 /* 126 * Given a map and a machine independent protection code, 127 * convert to a vax protection code. 128 */ 129 #define pte_prot(m, p) \ 130 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) 131 static int protection_codes[8]; 132 133 struct pmap kernel_pmap; 134 135 static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 136 137 static struct vm_object kptobj; 138 static int nkpt; 139 140 static uint64_t KPDphys; /* phys addr of kernel level 2 */ 141 uint64_t KPDPphys; /* phys addr of kernel level 3 */ 142 uint64_t KPML4phys; /* phys addr of kernel level 4 */ 143 144 extern int vmm_enabled; 145 extern void *vkernel_stack; 146 147 /* 148 * Data for the pv entry allocation mechanism 149 */ 150 static vm_zone_t pvzone; 151 static struct vm_zone pvzone_store; 152 static struct vm_object pvzone_obj; 153 static int pv_entry_count = 0; 154 static int pv_entry_max = 0; 155 static int pv_entry_high_water = 0; 156 static int pmap_pagedaemon_waken = 0; 157 static struct pv_entry *pvinit; 158 159 /* 160 * All those kernel PT submaps that BSD is so fond of 161 */ 162 pt_entry_t *CMAP1 = NULL, *ptmmap; 163 caddr_t CADDR1 = NULL; 164 static pt_entry_t *msgbufmap; 165 166 uint64_t KPTphys; 167 168 static PMAP_INLINE void free_pv_entry (pv_entry_t pv); 169 static pv_entry_t get_pv_entry (void); 170 static void i386_protection_init (void); 171 static __inline void pmap_clearbit (vm_page_t m, int bit); 172 173 static void pmap_remove_all (vm_page_t m); 174 static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, 175 pt_entry_t oldpte, vm_offset_t sva); 176 static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); 177 static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, 178 vm_offset_t va); 179 static boolean_t pmap_testbit (vm_page_t m, int bit); 180 static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, 181 vm_page_t mpte, vm_page_t m, pv_entry_t); 182 183 static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); 184 185 static int pmap_release_free_page (pmap_t pmap, vm_page_t p); 186 static vm_page_t _pmap_allocpte (pmap_t pmap, vm_pindex_t ptepindex); 187 static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); 188 static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); 189 190 static int 191 pv_entry_compare(pv_entry_t pv1, pv_entry_t pv2) 192 { 193 if (pv1->pv_va < pv2->pv_va) 194 return(-1); 195 if (pv1->pv_va > pv2->pv_va) 196 return(1); 197 return(0); 198 } 199 200 RB_GENERATE2(pv_entry_rb_tree, pv_entry, pv_entry, 201 pv_entry_compare, vm_offset_t, pv_va); 202 203 static __inline vm_pindex_t 204 pmap_pt_pindex(vm_offset_t va) 205 { 206 return va >> PDRSHIFT; 207 } 208 209 static __inline vm_pindex_t 210 pmap_pte_index(vm_offset_t va) 211 { 212 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 213 } 214 215 static __inline vm_pindex_t 216 pmap_pde_index(vm_offset_t va) 217 { 218 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 219 } 220 221 static __inline vm_pindex_t 222 pmap_pdpe_index(vm_offset_t va) 223 { 224 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 225 } 226 227 static __inline vm_pindex_t 228 pmap_pml4e_index(vm_offset_t va) 229 { 230 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 231 } 232 233 /* Return a pointer to the PML4 slot that corresponds to a VA */ 234 static __inline pml4_entry_t * 235 pmap_pml4e(pmap_t pmap, vm_offset_t va) 236 { 237 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 238 } 239 240 /* Return a pointer to the PDP slot that corresponds to a VA */ 241 static __inline pdp_entry_t * 242 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) 243 { 244 pdp_entry_t *pdpe; 245 246 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & VPTE_FRAME); 247 return (&pdpe[pmap_pdpe_index(va)]); 248 } 249 250 /* Return a pointer to the PDP slot that corresponds to a VA */ 251 static __inline pdp_entry_t * 252 pmap_pdpe(pmap_t pmap, vm_offset_t va) 253 { 254 pml4_entry_t *pml4e; 255 256 pml4e = pmap_pml4e(pmap, va); 257 if ((*pml4e & VPTE_V) == 0) 258 return NULL; 259 return (pmap_pml4e_to_pdpe(pml4e, va)); 260 } 261 262 /* Return a pointer to the PD slot that corresponds to a VA */ 263 static __inline pd_entry_t * 264 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) 265 { 266 pd_entry_t *pde; 267 268 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & VPTE_FRAME); 269 return (&pde[pmap_pde_index(va)]); 270 } 271 272 /* Return a pointer to the PD slot that corresponds to a VA */ 273 static __inline pd_entry_t * 274 pmap_pde(pmap_t pmap, vm_offset_t va) 275 { 276 pdp_entry_t *pdpe; 277 278 pdpe = pmap_pdpe(pmap, va); 279 if (pdpe == NULL || (*pdpe & VPTE_V) == 0) 280 return NULL; 281 return (pmap_pdpe_to_pde(pdpe, va)); 282 } 283 284 /* Return a pointer to the PT slot that corresponds to a VA */ 285 static __inline pt_entry_t * 286 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 287 { 288 pt_entry_t *pte; 289 290 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & VPTE_FRAME); 291 return (&pte[pmap_pte_index(va)]); 292 } 293 294 /* 295 * Hold pt_m for page table scans to prevent it from getting reused out 296 * from under us across blocking conditions in the body of the loop. 297 */ 298 static __inline 299 vm_page_t 300 pmap_hold_pt_page(pd_entry_t *pde, vm_offset_t va) 301 { 302 pt_entry_t pte; 303 vm_page_t pt_m; 304 305 pte = (pt_entry_t)*pde; 306 KKASSERT(pte != 0); 307 pt_m = PHYS_TO_VM_PAGE(pte & VPTE_FRAME); 308 vm_page_hold(pt_m); 309 310 return pt_m; 311 } 312 313 /* Return a pointer to the PT slot that corresponds to a VA */ 314 static __inline pt_entry_t * 315 pmap_pte(pmap_t pmap, vm_offset_t va) 316 { 317 pd_entry_t *pde; 318 319 pde = pmap_pde(pmap, va); 320 if (pde == NULL || (*pde & VPTE_V) == 0) 321 return NULL; 322 if ((*pde & VPTE_PS) != 0) /* compat with i386 pmap_pte() */ 323 return ((pt_entry_t *)pde); 324 return (pmap_pde_to_pte(pde, va)); 325 } 326 327 static PMAP_INLINE pt_entry_t * 328 vtopte(vm_offset_t va) 329 { 330 pt_entry_t *x; 331 x = pmap_pte(&kernel_pmap, va); 332 assert(x != NULL); 333 return x; 334 } 335 336 static __inline pd_entry_t * 337 vtopde(vm_offset_t va) 338 { 339 pd_entry_t *x; 340 x = pmap_pde(&kernel_pmap, va); 341 assert(x != NULL); 342 return x; 343 } 344 345 static uint64_t 346 allocpages(vm_paddr_t *firstaddr, int n) 347 { 348 uint64_t ret; 349 350 ret = *firstaddr; 351 /*bzero((void *)ret, n * PAGE_SIZE); not mapped yet */ 352 *firstaddr += n * PAGE_SIZE; 353 return (ret); 354 } 355 356 static void 357 create_dmap_vmm(vm_paddr_t *firstaddr) 358 { 359 void *stack_addr; 360 int pml4_stack_index; 361 int pdp_stack_index; 362 int pd_stack_index; 363 long i,j; 364 int regs[4]; 365 int amd_feature; 366 367 uint64_t KPDP_DMAP_phys = allocpages(firstaddr, NDMPML4E); 368 uint64_t KPDP_VSTACK_phys = allocpages(firstaddr, 1); 369 uint64_t KPD_VSTACK_phys = allocpages(firstaddr, 1); 370 371 pml4_entry_t *KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 372 pdp_entry_t *KPDP_DMAP_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_DMAP_phys); 373 pdp_entry_t *KPDP_VSTACK_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_VSTACK_phys); 374 pd_entry_t *KPD_VSTACK_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_VSTACK_phys); 375 376 bzero(KPDP_DMAP_virt, NDMPML4E * PAGE_SIZE); 377 bzero(KPDP_VSTACK_virt, 1 * PAGE_SIZE); 378 bzero(KPD_VSTACK_virt, 1 * PAGE_SIZE); 379 380 do_cpuid(0x80000001, regs); 381 amd_feature = regs[3]; 382 383 /* Build the mappings for the first 512GB */ 384 if (amd_feature & AMDID_PAGE1GB) { 385 /* In pages of 1 GB, if supported */ 386 for (i = 0; i < NPDPEPG; i++) { 387 KPDP_DMAP_virt[i] = ((uint64_t)i << PDPSHIFT); 388 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 389 } 390 } else { 391 /* In page of 2MB, otherwise */ 392 for (i = 0; i < NPDPEPG; i++) { 393 uint64_t KPD_DMAP_phys; 394 pd_entry_t *KPD_DMAP_virt; 395 396 KPD_DMAP_phys = allocpages(firstaddr, 1); 397 KPD_DMAP_virt = 398 (pd_entry_t *)PHYS_TO_DMAP(KPD_DMAP_phys); 399 400 bzero(KPD_DMAP_virt, PAGE_SIZE); 401 402 KPDP_DMAP_virt[i] = KPD_DMAP_phys; 403 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_U; 404 405 /* For each PD, we have to allocate NPTEPG PT */ 406 for (j = 0; j < NPTEPG; j++) { 407 KPD_DMAP_virt[j] = (i << PDPSHIFT) | 408 (j << PDRSHIFT); 409 KPD_DMAP_virt[j] |= VPTE_RW | VPTE_V | 410 VPTE_PS | VPTE_U; 411 } 412 } 413 } 414 415 /* DMAP for the first 512G */ 416 KPML4virt[0] = KPDP_DMAP_phys; 417 KPML4virt[0] |= VPTE_RW | VPTE_V | VPTE_U; 418 419 /* create a 2 MB map of the new stack */ 420 pml4_stack_index = (uint64_t)&stack_addr >> PML4SHIFT; 421 KPML4virt[pml4_stack_index] = KPDP_VSTACK_phys; 422 KPML4virt[pml4_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 423 424 pdp_stack_index = ((uint64_t)&stack_addr & PML4MASK) >> PDPSHIFT; 425 KPDP_VSTACK_virt[pdp_stack_index] = KPD_VSTACK_phys; 426 KPDP_VSTACK_virt[pdp_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 427 428 pd_stack_index = ((uint64_t)&stack_addr & PDPMASK) >> PDRSHIFT; 429 KPD_VSTACK_virt[pd_stack_index] = (uint64_t) vkernel_stack; 430 KPD_VSTACK_virt[pd_stack_index] |= VPTE_RW | VPTE_V | VPTE_U | VPTE_PS; 431 } 432 433 static void 434 create_pagetables(vm_paddr_t *firstaddr, int64_t ptov_offset) 435 { 436 int i; 437 pml4_entry_t *KPML4virt; 438 pdp_entry_t *KPDPvirt; 439 pd_entry_t *KPDvirt; 440 pt_entry_t *KPTvirt; 441 int kpml4i = pmap_pml4e_index(ptov_offset); 442 int kpdpi = pmap_pdpe_index(ptov_offset); 443 int kpdi = pmap_pde_index(ptov_offset); 444 445 /* 446 * Calculate NKPT - number of kernel page tables. We have to 447 * accomodoate prealloction of the vm_page_array, dump bitmap, 448 * MSGBUF_SIZE, and other stuff. Be generous. 449 * 450 * Maxmem is in pages. 451 */ 452 nkpt = (Maxmem * (sizeof(struct vm_page) * 2) + MSGBUF_SIZE) / NBPDR; 453 /* 454 * Allocate pages 455 */ 456 KPML4phys = allocpages(firstaddr, 1); 457 KPDPphys = allocpages(firstaddr, NKPML4E); 458 KPDphys = allocpages(firstaddr, NKPDPE); 459 KPTphys = allocpages(firstaddr, nkpt); 460 461 KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 462 KPDPvirt = (pdp_entry_t *)PHYS_TO_DMAP(KPDPphys); 463 KPDvirt = (pd_entry_t *)PHYS_TO_DMAP(KPDphys); 464 KPTvirt = (pt_entry_t *)PHYS_TO_DMAP(KPTphys); 465 466 bzero(KPML4virt, 1 * PAGE_SIZE); 467 bzero(KPDPvirt, NKPML4E * PAGE_SIZE); 468 bzero(KPDvirt, NKPDPE * PAGE_SIZE); 469 bzero(KPTvirt, nkpt * PAGE_SIZE); 470 471 /* Now map the page tables at their location within PTmap */ 472 for (i = 0; i < nkpt; i++) { 473 KPDvirt[i + kpdi] = KPTphys + (i << PAGE_SHIFT); 474 KPDvirt[i + kpdi] |= VPTE_RW | VPTE_V | VPTE_U; 475 } 476 477 /* And connect up the PD to the PDP */ 478 for (i = 0; i < NKPDPE; i++) { 479 KPDPvirt[i + kpdpi] = KPDphys + (i << PAGE_SHIFT); 480 KPDPvirt[i + kpdpi] |= VPTE_RW | VPTE_V | VPTE_U; 481 } 482 483 /* And recursively map PML4 to itself in order to get PTmap */ 484 KPML4virt[PML4PML4I] = KPML4phys; 485 KPML4virt[PML4PML4I] |= VPTE_RW | VPTE_V | VPTE_U; 486 487 /* Connect the KVA slot up to the PML4 */ 488 KPML4virt[kpml4i] = KPDPphys; 489 KPML4virt[kpml4i] |= VPTE_RW | VPTE_V | VPTE_U; 490 } 491 492 /* 493 * Typically used to initialize a fictitious page by vm/device_pager.c 494 */ 495 void 496 pmap_page_init(struct vm_page *m) 497 { 498 vm_page_init(m); 499 TAILQ_INIT(&m->md.pv_list); 500 } 501 502 /* 503 * Bootstrap the system enough to run with virtual memory. 504 * 505 * On the i386 this is called after mapping has already been enabled 506 * and just syncs the pmap module with what has already been done. 507 * [We can't call it easily with mapping off since the kernel is not 508 * mapped with PA == VA, hence we would have to relocate every address 509 * from the linked base (virtual) address "KERNBASE" to the actual 510 * (physical) address starting relative to 0] 511 */ 512 void 513 pmap_bootstrap(vm_paddr_t *firstaddr, int64_t ptov_offset) 514 { 515 vm_offset_t va; 516 pt_entry_t *pte; 517 518 /* 519 * Create an initial set of page tables to run the kernel in. 520 */ 521 create_pagetables(firstaddr, ptov_offset); 522 523 /* Create the DMAP for the VMM */ 524 if (vmm_enabled) { 525 create_dmap_vmm(firstaddr); 526 } 527 528 virtual_start = KvaStart; 529 virtual_end = KvaEnd; 530 531 /* 532 * Initialize protection array. 533 */ 534 i386_protection_init(); 535 536 /* 537 * The kernel's pmap is statically allocated so we don't have to use 538 * pmap_create, which is unlikely to work correctly at this part of 539 * the boot sequence (XXX and which no longer exists). 540 * 541 * The kernel_pmap's pm_pteobj is used only for locking and not 542 * for mmu pages. 543 */ 544 kernel_pmap.pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 545 kernel_pmap.pm_count = 1; 546 /* don't allow deactivation */ 547 CPUMASK_ASSALLONES(kernel_pmap.pm_active); 548 kernel_pmap.pm_pteobj = NULL; /* see pmap_init */ 549 RB_INIT(&kernel_pmap.pm_pvroot); 550 spin_init(&kernel_pmap.pm_spin, "pmapbootstrap"); 551 552 /* 553 * Reserve some special page table entries/VA space for temporary 554 * mapping of pages. 555 */ 556 #define SYSMAP(c, p, v, n) \ 557 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 558 559 va = virtual_start; 560 pte = pmap_pte(&kernel_pmap, va); 561 /* 562 * CMAP1/CMAP2 are used for zeroing and copying pages. 563 */ 564 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 565 566 #if JGV 567 /* 568 * Crashdump maps. 569 */ 570 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 571 #endif 572 573 /* 574 * ptvmmap is used for reading arbitrary physical pages via 575 * /dev/mem. 576 */ 577 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 578 579 /* 580 * msgbufp is used to map the system message buffer. 581 * XXX msgbufmap is not used. 582 */ 583 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 584 atop(round_page(MSGBUF_SIZE))) 585 586 virtual_start = va; 587 588 *CMAP1 = 0; 589 /* Not ready to do an invltlb yet for VMM*/ 590 if (!vmm_enabled) 591 cpu_invltlb(); 592 593 } 594 595 /* 596 * Initialize the pmap module. 597 * Called by vm_init, to initialize any structures that the pmap 598 * system needs to map virtual memory. 599 * pmap_init has been enhanced to support in a fairly consistant 600 * way, discontiguous physical memory. 601 */ 602 void 603 pmap_init(void) 604 { 605 int i; 606 int initial_pvs; 607 608 /* 609 * object for kernel page table pages 610 */ 611 /* JG I think the number can be arbitrary */ 612 vm_object_init(&kptobj, 5); 613 kernel_pmap.pm_pteobj = &kptobj; 614 615 /* 616 * Allocate memory for random pmap data structures. Includes the 617 * pv_head_table. 618 */ 619 for(i = 0; i < vm_page_array_size; i++) { 620 vm_page_t m; 621 622 m = &vm_page_array[i]; 623 TAILQ_INIT(&m->md.pv_list); 624 m->md.pv_list_count = 0; 625 } 626 627 /* 628 * init the pv free list 629 */ 630 initial_pvs = vm_page_array_size; 631 if (initial_pvs < MINPV) 632 initial_pvs = MINPV; 633 pvzone = &pvzone_store; 634 pvinit = (struct pv_entry *) 635 kmem_alloc(&kernel_map, 636 initial_pvs * sizeof (struct pv_entry), 637 VM_SUBSYS_PVENTRY); 638 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 639 initial_pvs); 640 641 /* 642 * Now it is safe to enable pv_table recording. 643 */ 644 pmap_initialized = TRUE; 645 } 646 647 /* 648 * Initialize the address space (zone) for the pv_entries. Set a 649 * high water mark so that the system can recover from excessive 650 * numbers of pv entries. 651 */ 652 void 653 pmap_init2(void) 654 { 655 int shpgperproc = PMAP_SHPGPERPROC; 656 657 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 658 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 659 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 660 pv_entry_high_water = 9 * (pv_entry_max / 10); 661 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT); 662 } 663 664 665 /*************************************************** 666 * Low level helper routines..... 667 ***************************************************/ 668 669 /* 670 * The modification bit is not tracked for any pages in this range. XXX 671 * such pages in this maps should always use pmap_k*() functions and not 672 * be managed anyhow. 673 * 674 * XXX User and kernel address spaces are independant for virtual kernels, 675 * this function only applies to the kernel pmap. 676 */ 677 int 678 pmap_track_modified(pmap_t pmap, vm_offset_t va) 679 { 680 if (pmap != &kernel_pmap) 681 return 1; 682 if ((va < clean_sva) || (va >= clean_eva)) 683 return 1; 684 else 685 return 0; 686 } 687 688 /* 689 * Extract the physical page address associated with the map/VA pair. 690 * 691 * No requirements. 692 */ 693 vm_paddr_t 694 pmap_extract(pmap_t pmap, vm_offset_t va, void **handlep) 695 { 696 vm_paddr_t rtval; 697 pt_entry_t *pte; 698 pd_entry_t pde, *pdep; 699 700 vm_object_hold(pmap->pm_pteobj); 701 rtval = 0; 702 pdep = pmap_pde(pmap, va); 703 if (pdep != NULL) { 704 pde = *pdep; 705 if (pde) { 706 if ((pde & VPTE_PS) != 0) { 707 /* JGV */ 708 rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 709 } else { 710 pte = pmap_pde_to_pte(pdep, va); 711 rtval = (*pte & VPTE_FRAME) | (va & PAGE_MASK); 712 } 713 } 714 } 715 if (handlep) 716 *handlep = NULL; /* XXX */ 717 vm_object_drop(pmap->pm_pteobj); 718 719 return rtval; 720 } 721 722 void 723 pmap_extract_done(void *handle) 724 { 725 pmap_t pmap; 726 727 if (handle) { 728 pmap = handle; 729 vm_object_drop(pmap->pm_pteobj); 730 } 731 } 732 733 /* 734 * Similar to extract but checks protections, SMP-friendly short-cut for 735 * vm_fault_page[_quick](). 736 * 737 * WARNING! THE RETURNED PAGE IS ONLY HELD AND NEITHER IT NOR ITS TARGET 738 * DATA IS SUITABLE FOR WRITING. Writing can interfere with 739 * pageouts flushes, msync, etc. The hold_count is not enough 740 * to avoid races against pageouts and other flush code doesn't 741 * care about hold_count. 742 */ 743 vm_page_t 744 pmap_fault_page_quick(pmap_t pmap __unused, vm_offset_t vaddr __unused, 745 vm_prot_t prot __unused, int *busyp __unused) 746 { 747 return(NULL); 748 } 749 750 /* 751 * Routine: pmap_kextract 752 * Function: 753 * Extract the physical page address associated 754 * kernel virtual address. 755 */ 756 vm_paddr_t 757 pmap_kextract(vm_offset_t va) 758 { 759 pd_entry_t pde; 760 vm_paddr_t pa; 761 762 KKASSERT(va >= KvaStart && va < KvaEnd); 763 764 /* 765 * The DMAP region is not included in [KvaStart, KvaEnd) 766 */ 767 #if 0 768 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 769 pa = DMAP_TO_PHYS(va); 770 } else { 771 #endif 772 pde = *vtopde(va); 773 if (pde & VPTE_PS) { 774 /* JGV */ 775 pa = (pde & PG_PS_FRAME) | (va & PDRMASK); 776 } else { 777 /* 778 * Beware of a concurrent promotion that changes the 779 * PDE at this point! For example, vtopte() must not 780 * be used to access the PTE because it would use the 781 * new PDE. It is, however, safe to use the old PDE 782 * because the page table page is preserved by the 783 * promotion. 784 */ 785 pa = *pmap_pde_to_pte(&pde, va); 786 pa = (pa & VPTE_FRAME) | (va & PAGE_MASK); 787 } 788 #if 0 789 } 790 #endif 791 return pa; 792 } 793 794 /*************************************************** 795 * Low level mapping routines..... 796 ***************************************************/ 797 798 /* 799 * Enter a mapping into kernel_pmap. Mappings created in this fashion 800 * are not managed. Mappings must be immediately accessible on all cpus. 801 * 802 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the 803 * real pmap and handle related races before storing the new vpte. The 804 * new semantics for kenter require use to do an UNCONDITIONAL invalidation, 805 * because the entry may have previously been cleared without an invalidation. 806 */ 807 void 808 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 809 { 810 pt_entry_t *ptep; 811 pt_entry_t npte; 812 813 KKASSERT(va >= KvaStart && va < KvaEnd); 814 npte = pa | VPTE_RW | VPTE_V | VPTE_U; 815 ptep = vtopte(va); 816 817 #if 1 818 pmap_inval_pte(ptep, &kernel_pmap, va); 819 #else 820 if (*pte & VPTE_V) 821 pmap_inval_pte(ptep, &kernel_pmap, va); 822 #endif 823 atomic_swap_long(ptep, npte); 824 } 825 826 /* 827 * Enter an unmanaged KVA mapping for the private use of the current 828 * cpu only. 829 * 830 * It is illegal for the mapping to be accessed by other cpus without 831 * proper invalidation. 832 */ 833 int 834 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) 835 { 836 pt_entry_t *ptep; 837 pt_entry_t npte; 838 int res; 839 840 KKASSERT(va >= KvaStart && va < KvaEnd); 841 842 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 843 ptep = vtopte(va); 844 845 #if 1 846 pmap_inval_pte_quick(ptep, &kernel_pmap, va); 847 res = 1; 848 #else 849 /* FUTURE */ 850 res = (*ptep != 0); 851 if (*pte & VPTE_V) 852 pmap_inval_pte(pte, &kernel_pmap, va); 853 #endif 854 atomic_swap_long(ptep, npte); 855 856 return res; 857 } 858 859 /* 860 * Invalidation will occur later, ok to be lazy here. 861 */ 862 int 863 pmap_kenter_noinval(vm_offset_t va, vm_paddr_t pa) 864 { 865 pt_entry_t *ptep; 866 pt_entry_t npte; 867 int res; 868 869 KKASSERT(va >= KvaStart && va < KvaEnd); 870 871 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 872 ptep = vtopte(va); 873 #if 1 874 res = 1; 875 #else 876 /* FUTURE */ 877 res = (*ptep != 0); 878 #endif 879 atomic_swap_long(ptep, npte); 880 881 return res; 882 } 883 884 /* 885 * Remove an unmanaged mapping created with pmap_kenter*(). 886 */ 887 void 888 pmap_kremove(vm_offset_t va) 889 { 890 pt_entry_t *ptep; 891 892 KKASSERT(va >= KvaStart && va < KvaEnd); 893 894 ptep = vtopte(va); 895 atomic_swap_long(ptep, 0); 896 pmap_inval_pte(ptep, &kernel_pmap, va); 897 } 898 899 /* 900 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize 901 * only with this cpu. 902 * 903 * Unfortunately because we optimize new entries by testing VPTE_V later 904 * on, we actually still have to synchronize with all the cpus. XXX maybe 905 * store a junk value and test against 0 in the other places instead? 906 */ 907 void 908 pmap_kremove_quick(vm_offset_t va) 909 { 910 pt_entry_t *ptep; 911 912 KKASSERT(va >= KvaStart && va < KvaEnd); 913 914 ptep = vtopte(va); 915 atomic_swap_long(ptep, 0); 916 pmap_inval_pte(ptep, &kernel_pmap, va); /* NOT _quick */ 917 } 918 919 /* 920 * Invalidation will occur later, ok to be lazy here. 921 */ 922 void 923 pmap_kremove_noinval(vm_offset_t va) 924 { 925 pt_entry_t *ptep; 926 927 KKASSERT(va >= KvaStart && va < KvaEnd); 928 929 ptep = vtopte(va); 930 atomic_swap_long(ptep, 0); 931 } 932 933 /* 934 * Used to map a range of physical addresses into kernel 935 * virtual address space. 936 * 937 * For now, VM is already on, we only need to map the 938 * specified memory. 939 */ 940 vm_offset_t 941 pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) 942 { 943 return PHYS_TO_DMAP(start); 944 } 945 946 /* 947 * Map a set of unmanaged VM pages into KVM. 948 */ 949 void 950 pmap_qenter(vm_offset_t beg_va, vm_page_t *m, int count) 951 { 952 vm_offset_t end_va; 953 vm_offset_t va; 954 955 end_va = beg_va + count * PAGE_SIZE; 956 KKASSERT(beg_va >= KvaStart && end_va <= KvaEnd); 957 958 for (va = beg_va; va < end_va; va += PAGE_SIZE) { 959 pt_entry_t *ptep; 960 961 ptep = vtopte(va); 962 atomic_swap_long(ptep, VM_PAGE_TO_PHYS(*m) | 963 VPTE_RW | VPTE_V | VPTE_U); 964 ++m; 965 } 966 pmap_invalidate_range(&kernel_pmap, beg_va, end_va); 967 /* pmap_inval_pte(pte, &kernel_pmap, va); */ 968 } 969 970 /* 971 * Undo the effects of pmap_qenter*(). 972 */ 973 void 974 pmap_qremove(vm_offset_t beg_va, int count) 975 { 976 vm_offset_t end_va; 977 vm_offset_t va; 978 979 end_va = beg_va + count * PAGE_SIZE; 980 KKASSERT(beg_va >= KvaStart && end_va < KvaEnd); 981 982 for (va = beg_va; va < end_va; va += PAGE_SIZE) { 983 pt_entry_t *ptep; 984 985 ptep = vtopte(va); 986 atomic_swap_long(ptep, 0); 987 } 988 pmap_invalidate_range(&kernel_pmap, beg_va, end_va); 989 } 990 991 /* 992 * Unlike the real pmap code, we can't avoid calling the real-kernel. 993 */ 994 void 995 pmap_qremove_quick(vm_offset_t va, int count) 996 { 997 pmap_qremove(va, count); 998 } 999 1000 void 1001 pmap_qremove_noinval(vm_offset_t va, int count) 1002 { 1003 pmap_qremove(va, count); 1004 } 1005 1006 /* 1007 * This routine works like vm_page_lookup() but also blocks as long as the 1008 * page is busy. This routine does not busy the page it returns. 1009 * 1010 * Unless the caller is managing objects whos pages are in a known state, 1011 * the call should be made with a critical section held so the page's object 1012 * association remains valid on return. 1013 */ 1014 static vm_page_t 1015 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 1016 { 1017 vm_page_t m; 1018 1019 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 1020 m = vm_page_lookup_busy_wait(object, pindex, TRUE, "pplookp"); 1021 1022 return(m); 1023 } 1024 1025 /* 1026 * Create a new thread and optionally associate it with a (new) process. 1027 * NOTE! the new thread's cpu may not equal the current cpu. 1028 */ 1029 void 1030 pmap_init_thread(thread_t td) 1031 { 1032 /* enforce pcb placement */ 1033 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; 1034 td->td_savefpu = &td->td_pcb->pcb_save; 1035 td->td_sp = (char *)td->td_pcb - 16; /* JG is -16 needed on x86_64? */ 1036 } 1037 1038 /* 1039 * This routine directly affects the fork perf for a process. 1040 */ 1041 void 1042 pmap_init_proc(struct proc *p) 1043 { 1044 } 1045 1046 /* 1047 * Unwire a page table which has been removed from the pmap. We own the 1048 * wire_count, so the page cannot go away. The page representing the page 1049 * table is passed in unbusied and must be busied if we cannot trivially 1050 * unwire it. 1051 * 1052 * XXX NOTE! This code is not usually run because we do not currently 1053 * implement dynamic page table page removal. The page in 1054 * its parent assumes at least 1 wire count, so no call to this 1055 * function ever sees a wire count less than 2. 1056 */ 1057 static int 1058 pmap_unwire_pgtable(pmap_t pmap, vm_offset_t va, vm_page_t m) 1059 { 1060 /* 1061 * Try to unwire optimally. If non-zero is returned the wire_count 1062 * is 1 and we must busy the page to unwire it. 1063 */ 1064 if (vm_page_unwire_quick(m) == 0) 1065 return 0; 1066 1067 vm_page_busy_wait(m, TRUE, "pmuwpt"); 1068 KASSERT(m->queue == PQ_NONE, 1069 ("_pmap_unwire_pgtable: %p->queue != PQ_NONE", m)); 1070 1071 if (m->wire_count == 1) { 1072 /* 1073 * Unmap the page table page. 1074 */ 1075 /* pmap_inval_add(info, pmap, -1); */ 1076 1077 if (m->pindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1078 /* PDP page */ 1079 pml4_entry_t *pml4; 1080 pml4 = pmap_pml4e(pmap, va); 1081 *pml4 = 0; 1082 } else if (m->pindex >= NUPT_TOTAL) { 1083 /* PD page */ 1084 pdp_entry_t *pdp; 1085 pdp = pmap_pdpe(pmap, va); 1086 *pdp = 0; 1087 } else { 1088 /* PT page */ 1089 pd_entry_t *pd; 1090 pd = pmap_pde(pmap, va); 1091 *pd = 0; 1092 } 1093 1094 KKASSERT(pmap->pm_stats.resident_count > 0); 1095 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1096 1097 if (pmap->pm_ptphint == m) 1098 pmap->pm_ptphint = NULL; 1099 1100 if (m->pindex < NUPT_TOTAL) { 1101 /* We just released a PT, unhold the matching PD */ 1102 vm_page_t pdpg; 1103 1104 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & 1105 VPTE_FRAME); 1106 pmap_unwire_pgtable(pmap, va, pdpg); 1107 } 1108 if (m->pindex >= NUPT_TOTAL && 1109 m->pindex < (NUPT_TOTAL + NUPD_TOTAL)) { 1110 /* We just released a PD, unhold the matching PDP */ 1111 vm_page_t pdppg; 1112 1113 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & 1114 VPTE_FRAME); 1115 pmap_unwire_pgtable(pmap, va, pdppg); 1116 } 1117 1118 /* 1119 * This was our last wire, the page had better be unwired 1120 * after we decrement wire_count. 1121 * 1122 * FUTURE NOTE: shared page directory page could result in 1123 * multiple wire counts. 1124 */ 1125 vm_page_unwire(m, 0); 1126 KKASSERT(m->wire_count == 0); 1127 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1128 vm_page_flash(m); 1129 vm_page_free(m); 1130 return 1; 1131 } else { 1132 /* XXX SMP race to 1 if not holding vmobj */ 1133 vm_page_unwire(m, 0); 1134 vm_page_wakeup(m); 1135 return 0; 1136 } 1137 } 1138 1139 /* 1140 * After removing a page table entry, this routine is used to 1141 * conditionally free the page, and manage the hold/wire counts. 1142 * 1143 * If not NULL the caller owns a wire_count on mpte, so it can't disappear. 1144 * If NULL the caller owns a wire_count on what would be the mpte, we must 1145 * look it up. 1146 */ 1147 static int 1148 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1149 { 1150 vm_pindex_t ptepindex; 1151 1152 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1153 1154 if (mpte == NULL) { 1155 /* 1156 * page table pages in the kernel_pmap are not managed. 1157 */ 1158 if (pmap == &kernel_pmap) 1159 return(0); 1160 ptepindex = pmap_pt_pindex(va); 1161 if (pmap->pm_ptphint && 1162 (pmap->pm_ptphint->pindex == ptepindex)) { 1163 mpte = pmap->pm_ptphint; 1164 } else { 1165 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1166 pmap->pm_ptphint = mpte; 1167 vm_page_wakeup(mpte); 1168 } 1169 } 1170 return pmap_unwire_pgtable(pmap, va, mpte); 1171 } 1172 1173 /* 1174 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we 1175 * just dummy it up so it works well enough for fork(). 1176 * 1177 * In DragonFly, process pmaps may only be used to manipulate user address 1178 * space, never kernel address space. 1179 */ 1180 void 1181 pmap_pinit0(struct pmap *pmap) 1182 { 1183 pmap_pinit(pmap); 1184 } 1185 1186 /* 1187 * Initialize a preallocated and zeroed pmap structure, 1188 * such as one in a vmspace structure. 1189 */ 1190 void 1191 pmap_pinit(struct pmap *pmap) 1192 { 1193 vm_page_t ptdpg; 1194 1195 /* 1196 * No need to allocate page table space yet but we do need a valid 1197 * page directory table. 1198 */ 1199 if (pmap->pm_pml4 == NULL) { 1200 pmap->pm_pml4 = (pml4_entry_t *) 1201 kmem_alloc_pageable(&kernel_map, PAGE_SIZE, 1202 VM_SUBSYS_PML4); 1203 } 1204 1205 /* 1206 * Allocate an object for the ptes 1207 */ 1208 if (pmap->pm_pteobj == NULL) 1209 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL + 1); 1210 1211 /* 1212 * Allocate the page directory page, unless we already have 1213 * one cached. If we used the cached page the wire_count will 1214 * already be set appropriately. 1215 */ 1216 if ((ptdpg = pmap->pm_pdirm) == NULL) { 1217 ptdpg = vm_page_grab(pmap->pm_pteobj, 1218 NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL, 1219 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | 1220 VM_ALLOC_ZERO); 1221 pmap->pm_pdirm = ptdpg; 1222 vm_page_flag_clear(ptdpg, PG_MAPPED | PG_WRITEABLE); 1223 vm_page_wire(ptdpg); 1224 vm_page_wakeup(ptdpg); 1225 pmap_kenter((vm_offset_t)pmap->pm_pml4, VM_PAGE_TO_PHYS(ptdpg)); 1226 } 1227 pmap->pm_count = 1; 1228 CPUMASK_ASSZERO(pmap->pm_active); 1229 pmap->pm_ptphint = NULL; 1230 RB_INIT(&pmap->pm_pvroot); 1231 spin_init(&pmap->pm_spin, "pmapinit"); 1232 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1233 pmap->pm_stats.resident_count = 1; 1234 pmap->pm_stats.wired_count = 1; 1235 } 1236 1237 /* 1238 * Clean up a pmap structure so it can be physically freed. This routine 1239 * is called by the vmspace dtor function. A great deal of pmap data is 1240 * left passively mapped to improve vmspace management so we have a bit 1241 * of cleanup work to do here. 1242 * 1243 * No requirements. 1244 */ 1245 void 1246 pmap_puninit(pmap_t pmap) 1247 { 1248 vm_page_t p; 1249 1250 KKASSERT(CPUMASK_TESTZERO(pmap->pm_active)); 1251 if ((p = pmap->pm_pdirm) != NULL) { 1252 KKASSERT(pmap->pm_pml4 != NULL); 1253 pmap_kremove((vm_offset_t)pmap->pm_pml4); 1254 vm_page_busy_wait(p, TRUE, "pgpun"); 1255 vm_page_unwire(p, 0); 1256 vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); 1257 vm_page_free(p); 1258 pmap->pm_pdirm = NULL; 1259 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1260 KKASSERT(pmap->pm_stats.wired_count == 0); 1261 } 1262 if (pmap->pm_pml4) { 1263 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE); 1264 pmap->pm_pml4 = NULL; 1265 } 1266 if (pmap->pm_pteobj) { 1267 vm_object_deallocate(pmap->pm_pteobj); 1268 pmap->pm_pteobj = NULL; 1269 } 1270 } 1271 1272 /* 1273 * This function is now unused (used to add the pmap to the pmap_list) 1274 */ 1275 void 1276 pmap_pinit2(struct pmap *pmap) 1277 { 1278 } 1279 1280 /* 1281 * Attempt to release and free a vm_page in a pmap. Returns 1 on success, 1282 * 0 on failure (if the procedure had to sleep). 1283 * 1284 * When asked to remove the page directory page itself, we actually just 1285 * leave it cached so we do not have to incur the SMP inval overhead of 1286 * removing the kernel mapping. pmap_puninit() will take care of it. 1287 */ 1288 static int 1289 pmap_release_free_page(struct pmap *pmap, vm_page_t p) 1290 { 1291 /* 1292 * This code optimizes the case of freeing non-busy 1293 * page-table pages. Those pages are zero now, and 1294 * might as well be placed directly into the zero queue. 1295 */ 1296 if (vm_page_busy_try(p, TRUE)) { 1297 vm_page_sleep_busy(p, TRUE, "pmaprl"); 1298 return 1; 1299 } 1300 1301 /* 1302 * Remove the page table page from the processes address space. 1303 */ 1304 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1305 /* 1306 * We are the pml4 table itself. 1307 */ 1308 /* XXX anything to do here? */ 1309 } else if (p->pindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1310 /* 1311 * We are a PDP page. 1312 * We look for the PML4 entry that points to us. 1313 */ 1314 vm_page_t m4; 1315 pml4_entry_t *pml4; 1316 int idx; 1317 1318 m4 = vm_page_lookup(pmap->pm_pteobj, 1319 NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL); 1320 KKASSERT(m4 != NULL); 1321 pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m4)); 1322 idx = (p->pindex - (NUPT_TOTAL + NUPD_TOTAL)) % NPML4EPG; 1323 KKASSERT(pml4[idx] != 0); 1324 if (pml4[idx] == 0) 1325 kprintf("pmap_release: Unmapped PML4\n"); 1326 pml4[idx] = 0; 1327 vm_page_unwire_quick(m4); 1328 } else if (p->pindex >= NUPT_TOTAL) { 1329 /* 1330 * We are a PD page. 1331 * We look for the PDP entry that points to us. 1332 */ 1333 vm_page_t m3; 1334 pdp_entry_t *pdp; 1335 int idx; 1336 1337 m3 = vm_page_lookup(pmap->pm_pteobj, 1338 NUPT_TOTAL + NUPD_TOTAL + 1339 (p->pindex - NUPT_TOTAL) / NPDPEPG); 1340 KKASSERT(m3 != NULL); 1341 pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m3)); 1342 idx = (p->pindex - NUPT_TOTAL) % NPDPEPG; 1343 KKASSERT(pdp[idx] != 0); 1344 if (pdp[idx] == 0) 1345 kprintf("pmap_release: Unmapped PDP %d\n", idx); 1346 pdp[idx] = 0; 1347 vm_page_unwire_quick(m3); 1348 } else { 1349 /* We are a PT page. 1350 * We look for the PD entry that points to us. 1351 */ 1352 vm_page_t m2; 1353 pd_entry_t *pd; 1354 int idx; 1355 1356 m2 = vm_page_lookup(pmap->pm_pteobj, 1357 NUPT_TOTAL + p->pindex / NPDEPG); 1358 KKASSERT(m2 != NULL); 1359 pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m2)); 1360 idx = p->pindex % NPDEPG; 1361 if (pd[idx] == 0) 1362 kprintf("pmap_release: Unmapped PD %d\n", idx); 1363 pd[idx] = 0; 1364 vm_page_unwire_quick(m2); 1365 } 1366 KKASSERT(pmap->pm_stats.resident_count > 0); 1367 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1368 1369 if (p->wire_count > 1) { 1370 panic("pmap_release: freeing held pt page " 1371 "pmap=%p pg=%p dmap=%p pi=%ld {%ld,%ld,%ld}", 1372 pmap, p, (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(p)), 1373 p->pindex, NUPT_TOTAL, NUPD_TOTAL, NUPDP_TOTAL); 1374 } 1375 1376 if (pmap->pm_ptphint == p) 1377 pmap->pm_ptphint = NULL; 1378 1379 /* 1380 * We leave the top-level page table page cached, wired, and mapped in 1381 * the pmap until the dtor function (pmap_puninit()) gets called. 1382 * However, still clean it up. 1383 */ 1384 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1385 bzero(pmap->pm_pml4, PAGE_SIZE); 1386 vm_page_wakeup(p); 1387 } else { 1388 vm_page_unwire(p, 0); 1389 vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); 1390 vm_page_free(p); 1391 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1392 } 1393 return 0; 1394 } 1395 1396 /* 1397 * Locate the requested PT, PD, or PDP page table page. 1398 * 1399 * Returns a busied page, caller must vm_page_wakeup() when done. 1400 */ 1401 static vm_page_t 1402 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex) 1403 { 1404 vm_page_t m; 1405 vm_page_t pm; 1406 vm_pindex_t pindex; 1407 pt_entry_t *ptep; 1408 pt_entry_t data; 1409 1410 /* 1411 * Find or fabricate a new pagetable page. A non-zero wire_count 1412 * indicates that the page has already been mapped into its parent. 1413 */ 1414 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1415 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1416 if (m->wire_count != 0) 1417 return m; 1418 1419 /* 1420 * Map the page table page into its parent, giving it 1 wire count. 1421 */ 1422 vm_page_wire(m); 1423 vm_page_unmanage(m); 1424 atomic_add_long(&pmap->pm_stats.resident_count, 1); 1425 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1426 1427 data = VM_PAGE_TO_PHYS(m) | 1428 VPTE_RW | VPTE_V | VPTE_U | VPTE_A | VPTE_M | VPTE_WIRED; 1429 atomic_add_long(&pmap->pm_stats.wired_count, 1); 1430 1431 if (ptepindex >= (NUPT_TOTAL + NUPD_TOTAL)) { 1432 /* 1433 * Map PDP into the PML4 1434 */ 1435 pindex = ptepindex - (NUPT_TOTAL + NUPD_TOTAL); 1436 pindex &= (NUPDP_TOTAL - 1); 1437 ptep = (pt_entry_t *)pmap->pm_pml4; 1438 pm = NULL; 1439 } else if (ptepindex >= NUPT_TOTAL) { 1440 /* 1441 * Map PD into its PDP 1442 */ 1443 pindex = (ptepindex - NUPT_TOTAL) >> NPDPEPGSHIFT; 1444 pindex += NUPT_TOTAL + NUPD_TOTAL; 1445 pm = _pmap_allocpte(pmap, pindex); 1446 pindex = (ptepindex - NUPT_TOTAL) & (NPDPEPG - 1); 1447 ptep = (void *)PHYS_TO_DMAP(pm->phys_addr); 1448 } else { 1449 /* 1450 * Map PT into its PD 1451 */ 1452 pindex = ptepindex >> NPDPEPGSHIFT; 1453 pindex += NUPT_TOTAL; 1454 pm = _pmap_allocpte(pmap, pindex); 1455 pindex = ptepindex & (NPTEPG - 1); 1456 ptep = (void *)PHYS_TO_DMAP(pm->phys_addr); 1457 } 1458 1459 /* 1460 * Install the pte in (pm). (m) prevents races. 1461 */ 1462 ptep += pindex; 1463 data = atomic_swap_long(ptep, data); 1464 if (pm) { 1465 vm_page_wire_quick(pm); 1466 vm_page_wakeup(pm); 1467 } 1468 pmap->pm_ptphint = pm; 1469 1470 return m; 1471 } 1472 1473 /* 1474 * Determine the page table page required to access the VA in the pmap 1475 * and allocate it if necessary. Return a held vm_page_t for the page. 1476 * 1477 * Only used with user pmaps. 1478 */ 1479 static vm_page_t 1480 pmap_allocpte(pmap_t pmap, vm_offset_t va) 1481 { 1482 vm_pindex_t ptepindex; 1483 vm_page_t m; 1484 1485 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1486 1487 /* 1488 * Calculate pagetable page index, and return the PT page to 1489 * the caller. 1490 */ 1491 ptepindex = pmap_pt_pindex(va); 1492 m = _pmap_allocpte(pmap, ptepindex); 1493 1494 return m; 1495 } 1496 1497 /*************************************************** 1498 * Pmap allocation/deallocation routines. 1499 ***************************************************/ 1500 1501 /* 1502 * Release any resources held by the given physical map. 1503 * Called when a pmap initialized by pmap_pinit is being released. 1504 * Should only be called if the map contains no valid mappings. 1505 */ 1506 static int pmap_release_callback(struct vm_page *p, void *data); 1507 1508 void 1509 pmap_release(struct pmap *pmap) 1510 { 1511 vm_object_t object = pmap->pm_pteobj; 1512 struct rb_vm_page_scan_info info; 1513 1514 KKASSERT(pmap != &kernel_pmap); 1515 1516 #if defined(DIAGNOSTIC) 1517 if (object->ref_count != 1) 1518 panic("pmap_release: pteobj reference count != 1"); 1519 #endif 1520 1521 info.pmap = pmap; 1522 info.object = object; 1523 1524 KASSERT(CPUMASK_TESTZERO(pmap->pm_active), 1525 ("pmap %p still active! %016jx", 1526 pmap, 1527 (uintmax_t)CPUMASK_LOWMASK(pmap->pm_active))); 1528 1529 vm_object_hold(object); 1530 do { 1531 info.error = 0; 1532 info.mpte = NULL; 1533 info.limit = object->generation; 1534 1535 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, 1536 pmap_release_callback, &info); 1537 if (info.error == 0 && info.mpte) { 1538 if (pmap_release_free_page(pmap, info.mpte)) 1539 info.error = 1; 1540 } 1541 } while (info.error); 1542 1543 pmap->pm_ptphint = NULL; 1544 1545 KASSERT((pmap->pm_stats.wired_count == (pmap->pm_pdirm != NULL)), 1546 ("pmap_release: dangling count %p %ld", 1547 pmap, pmap->pm_stats.wired_count)); 1548 1549 vm_object_drop(object); 1550 } 1551 1552 static int 1553 pmap_release_callback(struct vm_page *p, void *data) 1554 { 1555 struct rb_vm_page_scan_info *info = data; 1556 1557 if (p->pindex == NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL) { 1558 info->mpte = p; 1559 return(0); 1560 } 1561 if (pmap_release_free_page(info->pmap, p)) { 1562 info->error = 1; 1563 return(-1); 1564 } 1565 if (info->object->generation != info->limit) { 1566 info->error = 1; 1567 return(-1); 1568 } 1569 return(0); 1570 } 1571 1572 /* 1573 * Grow the number of kernel page table entries, if needed. 1574 * 1575 * kernel_map must be locked exclusively by the caller. 1576 */ 1577 void 1578 pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) 1579 { 1580 vm_offset_t addr; 1581 vm_paddr_t paddr; 1582 vm_offset_t ptppaddr; 1583 vm_page_t nkpg; 1584 pd_entry_t *pde, newpdir; 1585 pdp_entry_t newpdp; 1586 1587 addr = kend; 1588 1589 vm_object_hold(&kptobj); 1590 if (kernel_vm_end == 0) { 1591 kernel_vm_end = KvaStart; 1592 nkpt = 0; 1593 while ((*pmap_pde(&kernel_pmap, kernel_vm_end) & VPTE_V) != 0) { 1594 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1595 nkpt++; 1596 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1597 kernel_vm_end = kernel_map.max_offset; 1598 break; 1599 } 1600 } 1601 } 1602 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1603 if (addr - 1 >= kernel_map.max_offset) 1604 addr = kernel_map.max_offset; 1605 while (kernel_vm_end < addr) { 1606 pde = pmap_pde(&kernel_pmap, kernel_vm_end); 1607 if (pde == NULL) { 1608 /* We need a new PDP entry */ 1609 nkpg = vm_page_alloc(&kptobj, nkpt, 1610 VM_ALLOC_NORMAL | 1611 VM_ALLOC_SYSTEM | 1612 VM_ALLOC_INTERRUPT); 1613 if (nkpg == NULL) { 1614 panic("pmap_growkernel: no memory to " 1615 "grow kernel"); 1616 } 1617 paddr = VM_PAGE_TO_PHYS(nkpg); 1618 pmap_zero_page(paddr); 1619 newpdp = (pdp_entry_t)(paddr | 1620 VPTE_V | VPTE_RW | VPTE_U | 1621 VPTE_A | VPTE_M | VPTE_WIRED); 1622 *pmap_pdpe(&kernel_pmap, kernel_vm_end) = newpdp; 1623 atomic_add_long(&kernel_pmap.pm_stats.wired_count, 1); 1624 nkpt++; 1625 continue; /* try again */ 1626 } 1627 if ((*pde & VPTE_V) != 0) { 1628 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1629 ~(PAGE_SIZE * NPTEPG - 1); 1630 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1631 kernel_vm_end = kernel_map.max_offset; 1632 break; 1633 } 1634 continue; 1635 } 1636 1637 /* 1638 * This index is bogus, but out of the way 1639 */ 1640 nkpg = vm_page_alloc(&kptobj, nkpt, 1641 VM_ALLOC_NORMAL | 1642 VM_ALLOC_SYSTEM | 1643 VM_ALLOC_INTERRUPT); 1644 if (nkpg == NULL) 1645 panic("pmap_growkernel: no memory to grow kernel"); 1646 1647 vm_page_wire(nkpg); 1648 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1649 pmap_zero_page(ptppaddr); 1650 newpdir = (pd_entry_t)(ptppaddr | 1651 VPTE_V | VPTE_RW | VPTE_U | 1652 VPTE_A | VPTE_M | VPTE_WIRED); 1653 *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir; 1654 atomic_add_long(&kernel_pmap.pm_stats.wired_count, 1); 1655 nkpt++; 1656 1657 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1658 ~(PAGE_SIZE * NPTEPG - 1); 1659 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1660 kernel_vm_end = kernel_map.max_offset; 1661 break; 1662 } 1663 } 1664 vm_object_drop(&kptobj); 1665 } 1666 1667 /* 1668 * Add a reference to the specified pmap. 1669 * 1670 * No requirements. 1671 */ 1672 void 1673 pmap_reference(pmap_t pmap) 1674 { 1675 if (pmap) 1676 atomic_add_int(&pmap->pm_count, 1); 1677 } 1678 1679 /************************************************************************ 1680 * VMSPACE MANAGEMENT * 1681 ************************************************************************ 1682 * 1683 * The VMSPACE management we do in our virtual kernel must be reflected 1684 * in the real kernel. This is accomplished by making vmspace system 1685 * calls to the real kernel. 1686 */ 1687 void 1688 cpu_vmspace_alloc(struct vmspace *vm) 1689 { 1690 int r; 1691 void *rp; 1692 vpte_t vpte; 1693 1694 /* 1695 * If VMM enable, don't do nothing, we 1696 * are able to use real page tables 1697 */ 1698 if (vmm_enabled) 1699 return; 1700 1701 #define USER_SIZE (VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) 1702 1703 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) 1704 panic("vmspace_create() failed"); 1705 1706 rp = vmspace_mmap(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1707 PROT_READ|PROT_WRITE, 1708 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, 1709 MemImageFd, 0); 1710 if (rp == MAP_FAILED) 1711 panic("vmspace_mmap: failed"); 1712 vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1713 MADV_NOSYNC, 0); 1714 vpte = VM_PAGE_TO_PHYS(vmspace_pmap(vm)->pm_pdirm) | 1715 VPTE_RW | VPTE_V | VPTE_U; 1716 r = vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1717 MADV_SETMAP, vpte); 1718 if (r < 0) 1719 panic("vmspace_mcontrol: failed"); 1720 } 1721 1722 void 1723 cpu_vmspace_free(struct vmspace *vm) 1724 { 1725 /* 1726 * If VMM enable, don't do nothing, we 1727 * are able to use real page tables 1728 */ 1729 if (vmm_enabled) 1730 return; 1731 1732 if (vmspace_destroy(&vm->vm_pmap) < 0) 1733 panic("vmspace_destroy() failed"); 1734 } 1735 1736 /*************************************************** 1737 * page management routines. 1738 ***************************************************/ 1739 1740 /* 1741 * free the pv_entry back to the free list. This function may be 1742 * called from an interrupt. 1743 */ 1744 static __inline void 1745 free_pv_entry(pv_entry_t pv) 1746 { 1747 atomic_add_int(&pv_entry_count, -1); 1748 KKASSERT(pv_entry_count >= 0); 1749 zfree(pvzone, pv); 1750 } 1751 1752 /* 1753 * get a new pv_entry, allocating a block from the system 1754 * when needed. This function may be called from an interrupt. 1755 */ 1756 static pv_entry_t 1757 get_pv_entry(void) 1758 { 1759 atomic_add_int(&pv_entry_count, 1); 1760 if (pv_entry_high_water && 1761 (pv_entry_count > pv_entry_high_water) && 1762 atomic_swap_int(&pmap_pagedaemon_waken, 1) == 0) { 1763 wakeup(&vm_pages_needed); 1764 } 1765 return zalloc(pvzone); 1766 } 1767 1768 /* 1769 * This routine is very drastic, but can save the system 1770 * in a pinch. 1771 * 1772 * No requirements. 1773 */ 1774 void 1775 pmap_collect(void) 1776 { 1777 int i; 1778 vm_page_t m; 1779 static int warningdone=0; 1780 1781 if (pmap_pagedaemon_waken == 0) 1782 return; 1783 pmap_pagedaemon_waken = 0; 1784 1785 if (warningdone < 5) { 1786 kprintf("pmap_collect: collecting pv entries -- " 1787 "suggest increasing PMAP_SHPGPERPROC\n"); 1788 warningdone++; 1789 } 1790 1791 for (i = 0; i < vm_page_array_size; i++) { 1792 m = &vm_page_array[i]; 1793 if (m->wire_count || m->hold_count) 1794 continue; 1795 if (vm_page_busy_try(m, TRUE) == 0) { 1796 if (m->wire_count == 0 && m->hold_count == 0) { 1797 pmap_remove_all(m); 1798 } 1799 vm_page_wakeup(m); 1800 } 1801 } 1802 } 1803 1804 1805 /* 1806 * If it is the first entry on the list, it is actually 1807 * in the header and we must copy the following entry up 1808 * to the header. Otherwise we must search the list for 1809 * the entry. In either case we free the now unused entry. 1810 * 1811 * pmap->pm_pteobj must be held and (m) must be spin-locked by the caller. 1812 */ 1813 static int 1814 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1815 { 1816 pv_entry_t pv; 1817 int rtval; 1818 1819 vm_page_spin_lock(m); 1820 pv = pv_entry_rb_tree_RB_LOOKUP(&pmap->pm_pvroot, va); 1821 1822 /* 1823 * Note that pv_ptem is NULL if the page table page itself is not 1824 * managed, even if the page being removed IS managed. 1825 */ 1826 rtval = 0; 1827 if (pv) { 1828 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1829 if (TAILQ_EMPTY(&m->md.pv_list)) 1830 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1831 m->md.pv_list_count--; 1832 KKASSERT(m->md.pv_list_count >= 0); 1833 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 1834 atomic_add_int(&pmap->pm_generation, 1); 1835 vm_page_spin_unlock(m); 1836 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1837 free_pv_entry(pv); 1838 } else { 1839 vm_page_spin_unlock(m); 1840 kprintf("pmap_remove_entry: could not find " 1841 "pmap=%p m=%p va=%016jx\n", 1842 pmap, m, va); 1843 } 1844 return rtval; 1845 } 1846 1847 /* 1848 * Create a pv entry for page at pa for (pmap, va). If the page table page 1849 * holding the VA is managed, mpte will be non-NULL. 1850 * 1851 * pmap->pm_pteobj must be held and (m) must be spin-locked by the caller. 1852 */ 1853 static void 1854 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m, 1855 pv_entry_t pv) 1856 { 1857 pv->pv_va = va; 1858 pv->pv_pmap = pmap; 1859 pv->pv_ptem = mpte; 1860 1861 m->md.pv_list_count++; 1862 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1863 pv = pv_entry_rb_tree_RB_INSERT(&pmap->pm_pvroot, pv); 1864 vm_page_flag_set(m, PG_MAPPED); 1865 KKASSERT(pv == NULL); 1866 } 1867 1868 /* 1869 * pmap_remove_pte: do the things to unmap a page in a process 1870 * 1871 * Caller holds pmap->pm_pteobj and holds the associated page table 1872 * page busy to prevent races. 1873 */ 1874 static int 1875 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, pt_entry_t oldpte, 1876 vm_offset_t va) 1877 { 1878 vm_page_t m; 1879 int error; 1880 1881 if (ptq) 1882 oldpte = pmap_inval_loadandclear(ptq, pmap, va); 1883 1884 if (oldpte & VPTE_WIRED) 1885 atomic_add_long(&pmap->pm_stats.wired_count, -1); 1886 KKASSERT(pmap->pm_stats.wired_count >= 0); 1887 1888 #if 0 1889 /* 1890 * Machines that don't support invlpg, also don't support 1891 * PG_G. XXX PG_G is disabled for SMP so don't worry about 1892 * the SMP case. 1893 */ 1894 if (oldpte & PG_G) 1895 cpu_invlpg((void *)va); 1896 #endif 1897 KKASSERT(pmap->pm_stats.resident_count > 0); 1898 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1899 if (oldpte & VPTE_MANAGED) { 1900 m = PHYS_TO_VM_PAGE(oldpte); 1901 1902 /* 1903 * NOTE: pmap_remove_entry() will spin-lock the page 1904 */ 1905 if (oldpte & VPTE_M) { 1906 #if defined(PMAP_DIAGNOSTIC) 1907 if (pmap_nw_modified(oldpte)) { 1908 kprintf("pmap_remove: modified page not " 1909 "writable: va: 0x%lx, pte: 0x%lx\n", 1910 va, oldpte); 1911 } 1912 #endif 1913 if (pmap_track_modified(pmap, va)) 1914 vm_page_dirty(m); 1915 } 1916 if (oldpte & VPTE_A) 1917 vm_page_flag_set(m, PG_REFERENCED); 1918 error = pmap_remove_entry(pmap, m, va); 1919 } else { 1920 error = pmap_unuse_pt(pmap, va, NULL); 1921 } 1922 return error; 1923 } 1924 1925 /* 1926 * pmap_remove_page: 1927 * 1928 * Remove a single page from a process address space. 1929 * 1930 * This function may not be called from an interrupt if the pmap is 1931 * not kernel_pmap. 1932 * 1933 * Caller holds pmap->pm_pteobj 1934 */ 1935 static void 1936 pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1937 { 1938 pt_entry_t *pte; 1939 1940 pte = pmap_pte(pmap, va); 1941 if (pte == NULL) 1942 return; 1943 if ((*pte & VPTE_V) == 0) 1944 return; 1945 pmap_remove_pte(pmap, pte, 0, va); 1946 } 1947 1948 /* 1949 * Remove the given range of addresses from the specified map. 1950 * 1951 * It is assumed that the start and end are properly rounded to 1952 * the page size. 1953 * 1954 * This function may not be called from an interrupt if the pmap is 1955 * not kernel_pmap. 1956 * 1957 * No requirements. 1958 */ 1959 void 1960 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1961 { 1962 vm_offset_t va_next; 1963 pml4_entry_t *pml4e; 1964 pdp_entry_t *pdpe; 1965 pd_entry_t ptpaddr, *pde; 1966 pt_entry_t *pte; 1967 vm_page_t pt_m; 1968 1969 if (pmap == NULL) 1970 return; 1971 1972 vm_object_hold(pmap->pm_pteobj); 1973 KKASSERT(pmap->pm_stats.resident_count >= 0); 1974 if (pmap->pm_stats.resident_count == 0) { 1975 vm_object_drop(pmap->pm_pteobj); 1976 return; 1977 } 1978 1979 /* 1980 * special handling of removing one page. a very 1981 * common operation and easy to short circuit some 1982 * code. 1983 */ 1984 if (sva + PAGE_SIZE == eva) { 1985 pde = pmap_pde(pmap, sva); 1986 if (pde && (*pde & VPTE_PS) == 0) { 1987 pmap_remove_page(pmap, sva); 1988 vm_object_drop(pmap->pm_pteobj); 1989 return; 1990 } 1991 } 1992 1993 for (; sva < eva; sva = va_next) { 1994 pml4e = pmap_pml4e(pmap, sva); 1995 if ((*pml4e & VPTE_V) == 0) { 1996 va_next = (sva + NBPML4) & ~PML4MASK; 1997 if (va_next < sva) 1998 va_next = eva; 1999 continue; 2000 } 2001 2002 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2003 if ((*pdpe & VPTE_V) == 0) { 2004 va_next = (sva + NBPDP) & ~PDPMASK; 2005 if (va_next < sva) 2006 va_next = eva; 2007 continue; 2008 } 2009 2010 /* 2011 * Calculate index for next page table. 2012 */ 2013 va_next = (sva + NBPDR) & ~PDRMASK; 2014 if (va_next < sva) 2015 va_next = eva; 2016 2017 pde = pmap_pdpe_to_pde(pdpe, sva); 2018 ptpaddr = *pde; 2019 2020 /* 2021 * Weed out invalid mappings. 2022 */ 2023 if (ptpaddr == 0) 2024 continue; 2025 2026 /* 2027 * Check for large page. 2028 */ 2029 if ((ptpaddr & VPTE_PS) != 0) { 2030 /* JG FreeBSD has more complex treatment here */ 2031 KKASSERT(*pde != 0); 2032 pmap_inval_pde(pde, pmap, sva); 2033 atomic_add_long(&pmap->pm_stats.resident_count, 2034 -NBPDR / PAGE_SIZE); 2035 continue; 2036 } 2037 2038 /* 2039 * Limit our scan to either the end of the va represented 2040 * by the current page table page, or to the end of the 2041 * range being removed. 2042 */ 2043 if (va_next > eva) 2044 va_next = eva; 2045 2046 /* 2047 * NOTE: pmap_remove_pte() can block. 2048 */ 2049 pt_m = pmap_hold_pt_page(pde, sva); 2050 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2051 sva += PAGE_SIZE) { 2052 if (*pte) { 2053 if (pmap_remove_pte(pmap, pte, 0, sva)) 2054 break; 2055 } 2056 } 2057 vm_page_unhold(pt_m); 2058 } 2059 vm_object_drop(pmap->pm_pteobj); 2060 } 2061 2062 /* 2063 * Removes this physical page from all physical maps in which it resides. 2064 * Reflects back modify bits to the pager. 2065 * 2066 * This routine may not be called from an interrupt. 2067 * 2068 * No requirements. 2069 */ 2070 static void 2071 pmap_remove_all(vm_page_t m) 2072 { 2073 pt_entry_t *pte, tpte; 2074 pv_entry_t pv; 2075 vm_object_t pmobj; 2076 pmap_t pmap; 2077 2078 #if defined(PMAP_DIAGNOSTIC) 2079 /* 2080 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 2081 * pages! 2082 */ 2083 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2084 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); 2085 } 2086 #endif 2087 2088 restart: 2089 vm_page_spin_lock(m); 2090 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2091 pmap = pv->pv_pmap; 2092 pmobj = pmap->pm_pteobj; 2093 2094 /* 2095 * Handle reversed lock ordering 2096 */ 2097 if (vm_object_hold_try(pmobj) == 0) { 2098 refcount_acquire(&pmobj->hold_count); 2099 vm_page_spin_unlock(m); 2100 vm_object_lock(pmobj); 2101 vm_page_spin_lock(m); 2102 if (pv != TAILQ_FIRST(&m->md.pv_list) || 2103 pmap != pv->pv_pmap || 2104 pmobj != pmap->pm_pteobj) { 2105 vm_page_spin_unlock(m); 2106 vm_object_drop(pmobj); 2107 goto restart; 2108 } 2109 } 2110 2111 KKASSERT(pmap->pm_stats.resident_count > 0); 2112 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2113 2114 pte = pmap_pte(pmap, pv->pv_va); 2115 KKASSERT(pte != NULL); 2116 2117 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2118 if (tpte & VPTE_WIRED) 2119 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2120 KKASSERT(pmap->pm_stats.wired_count >= 0); 2121 2122 if (tpte & VPTE_A) 2123 vm_page_flag_set(m, PG_REFERENCED); 2124 2125 /* 2126 * Update the vm_page_t clean and reference bits. 2127 */ 2128 if (tpte & VPTE_M) { 2129 #if defined(PMAP_DIAGNOSTIC) 2130 if (pmap_nw_modified(tpte)) { 2131 kprintf( 2132 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 2133 pv->pv_va, tpte); 2134 } 2135 #endif 2136 if (pmap_track_modified(pmap, pv->pv_va)) 2137 vm_page_dirty(m); 2138 } 2139 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2140 if (TAILQ_EMPTY(&m->md.pv_list)) 2141 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2142 m->md.pv_list_count--; 2143 KKASSERT(m->md.pv_list_count >= 0); 2144 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 2145 atomic_add_int(&pmap->pm_generation, 1); 2146 vm_page_spin_unlock(m); 2147 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2148 free_pv_entry(pv); 2149 2150 vm_object_drop(pmobj); 2151 vm_page_spin_lock(m); 2152 } 2153 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); 2154 vm_page_spin_unlock(m); 2155 } 2156 2157 /* 2158 * Removes the page from a particular pmap 2159 */ 2160 void 2161 pmap_remove_specific(pmap_t pmap, vm_page_t m) 2162 { 2163 pt_entry_t *pte, tpte; 2164 pv_entry_t pv; 2165 2166 vm_object_hold(pmap->pm_pteobj); 2167 again: 2168 vm_page_spin_lock(m); 2169 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2170 if (pv->pv_pmap != pmap) 2171 continue; 2172 2173 KKASSERT(pmap->pm_stats.resident_count > 0); 2174 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2175 2176 pte = pmap_pte(pmap, pv->pv_va); 2177 KKASSERT(pte != NULL); 2178 2179 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2180 if (tpte & VPTE_WIRED) 2181 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2182 KKASSERT(pmap->pm_stats.wired_count >= 0); 2183 2184 if (tpte & VPTE_A) 2185 vm_page_flag_set(m, PG_REFERENCED); 2186 2187 /* 2188 * Update the vm_page_t clean and reference bits. 2189 */ 2190 if (tpte & VPTE_M) { 2191 if (pmap_track_modified(pmap, pv->pv_va)) 2192 vm_page_dirty(m); 2193 } 2194 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2195 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 2196 atomic_add_int(&pmap->pm_generation, 1); 2197 m->md.pv_list_count--; 2198 KKASSERT(m->md.pv_list_count >= 0); 2199 if (TAILQ_EMPTY(&m->md.pv_list)) 2200 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2201 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2202 vm_page_spin_unlock(m); 2203 free_pv_entry(pv); 2204 goto again; 2205 } 2206 vm_page_spin_unlock(m); 2207 vm_object_drop(pmap->pm_pteobj); 2208 } 2209 2210 /* 2211 * Set the physical protection on the specified range of this map 2212 * as requested. 2213 * 2214 * This function may not be called from an interrupt if the map is 2215 * not the kernel_pmap. 2216 * 2217 * No requirements. 2218 */ 2219 void 2220 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2221 { 2222 vm_offset_t va_next; 2223 pml4_entry_t *pml4e; 2224 pdp_entry_t *pdpe; 2225 pd_entry_t ptpaddr, *pde; 2226 pt_entry_t *pte; 2227 vm_page_t pt_m; 2228 2229 if (pmap == NULL) 2230 return; 2231 2232 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2233 pmap_remove(pmap, sva, eva); 2234 return; 2235 } 2236 2237 if (prot & VM_PROT_WRITE) 2238 return; 2239 2240 vm_object_hold(pmap->pm_pteobj); 2241 2242 for (; sva < eva; sva = va_next) { 2243 pml4e = pmap_pml4e(pmap, sva); 2244 if ((*pml4e & VPTE_V) == 0) { 2245 va_next = (sva + NBPML4) & ~PML4MASK; 2246 if (va_next < sva) 2247 va_next = eva; 2248 continue; 2249 } 2250 2251 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2252 if ((*pdpe & VPTE_V) == 0) { 2253 va_next = (sva + NBPDP) & ~PDPMASK; 2254 if (va_next < sva) 2255 va_next = eva; 2256 continue; 2257 } 2258 2259 va_next = (sva + NBPDR) & ~PDRMASK; 2260 if (va_next < sva) 2261 va_next = eva; 2262 2263 pde = pmap_pdpe_to_pde(pdpe, sva); 2264 ptpaddr = *pde; 2265 2266 #if 0 2267 /* 2268 * Check for large page. 2269 */ 2270 if ((ptpaddr & VPTE_PS) != 0) { 2271 /* JG correct? */ 2272 pmap_clean_pde(pde, pmap, sva); 2273 atomic_add_long(&pmap->pm_stats.resident_count, 2274 -NBPDR / PAGE_SIZE); 2275 continue; 2276 } 2277 #endif 2278 2279 /* 2280 * Weed out invalid mappings. Note: we assume that the page 2281 * directory table is always allocated, and in kernel virtual. 2282 */ 2283 if (ptpaddr == 0) 2284 continue; 2285 2286 if (va_next > eva) 2287 va_next = eva; 2288 2289 pt_m = pmap_hold_pt_page(pde, sva); 2290 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2291 sva += PAGE_SIZE) { 2292 /* 2293 * Clean managed pages and also check the accessed 2294 * bit. Just remove write perms for unmanaged 2295 * pages. Be careful of races, turning off write 2296 * access will force a fault rather then setting 2297 * the modified bit at an unexpected time. 2298 */ 2299 pmap_clean_pte(pte, pmap, sva, NULL); 2300 } 2301 vm_page_unhold(pt_m); 2302 } 2303 vm_object_drop(pmap->pm_pteobj); 2304 } 2305 2306 /* 2307 * Enter a managed page into a pmap. If the page is not wired related pmap 2308 * data can be destroyed at any time for later demand-operation. 2309 * 2310 * Insert the vm_page (m) at virtual address (v) in (pmap), with the 2311 * specified protection, and wire the mapping if requested. 2312 * 2313 * NOTE: This routine may not lazy-evaluate or lose information. The 2314 * page must actually be inserted into the given map NOW. 2315 * 2316 * NOTE: When entering a page at a KVA address, the pmap must be the 2317 * kernel_pmap. 2318 * 2319 * No requirements. 2320 */ 2321 void 2322 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2323 boolean_t wired, vm_map_entry_t entry __unused) 2324 { 2325 vm_paddr_t pa; 2326 pv_entry_t pv; 2327 pt_entry_t *pte; 2328 pt_entry_t origpte, newpte; 2329 vm_paddr_t opa; 2330 vm_page_t mpte; 2331 2332 if (pmap == NULL) 2333 return; 2334 2335 va = trunc_page(va); 2336 2337 vm_object_hold(pmap->pm_pteobj); 2338 2339 /* 2340 * Get the page table page. The kernel_pmap's page table pages 2341 * are preallocated and have no associated vm_page_t. 2342 * 2343 * If not NULL, mpte will be busied and we must vm_page_wakeup() 2344 * to cleanup. There will already be at least one wire count from 2345 * it being mapped into its parent. 2346 */ 2347 if (pmap == &kernel_pmap) { 2348 mpte = NULL; 2349 pte = vtopte(va); 2350 } else { 2351 mpte = pmap_allocpte(pmap, va); 2352 pte = (void *)PHYS_TO_DMAP(mpte->phys_addr); 2353 pte += pmap_pte_index(va); 2354 } 2355 2356 /* 2357 * Deal with races against the kernel's real MMU by cleaning the 2358 * page, even if we are re-entering the same page. 2359 */ 2360 pa = VM_PAGE_TO_PHYS(m); 2361 origpte = pmap_inval_loadandclear(pte, pmap, va); 2362 /*origpte = pmap_clean_pte(pte, pmap, va, NULL);*/ 2363 opa = origpte & VPTE_FRAME; 2364 2365 if (origpte & VPTE_PS) 2366 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2367 2368 if ((origpte & (VPTE_MANAGED|VPTE_M)) == (VPTE_MANAGED|VPTE_M)) { 2369 if (pmap_track_modified(pmap, va)) { 2370 vm_page_t om = PHYS_TO_VM_PAGE(opa); 2371 vm_page_dirty(om); 2372 } 2373 } 2374 2375 /* 2376 * Mapping has not changed, must be protection or wiring change. 2377 */ 2378 if (origpte && (opa == pa)) { 2379 /* 2380 * Wiring change, just update stats. We don't worry about 2381 * wiring PT pages as they remain resident as long as there 2382 * are valid mappings in them. Hence, if a user page is wired, 2383 * the PT page will be also. 2384 */ 2385 if (wired && ((origpte & VPTE_WIRED) == 0)) 2386 atomic_add_long(&pmap->pm_stats.wired_count, 1); 2387 else if (!wired && (origpte & VPTE_WIRED)) 2388 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2389 2390 if (origpte & VPTE_MANAGED) { 2391 pa |= VPTE_MANAGED; 2392 KKASSERT(m->flags & PG_MAPPED); 2393 KKASSERT(!(m->flags & (PG_FICTITIOUS|PG_UNMANAGED))); 2394 } else { 2395 KKASSERT((m->flags & (PG_FICTITIOUS|PG_UNMANAGED))); 2396 } 2397 vm_page_spin_lock(m); 2398 goto validate; 2399 } 2400 2401 /* 2402 * Bump the wire_count for the page table page. 2403 */ 2404 if (mpte) 2405 vm_page_wire_quick(mpte); 2406 2407 /* 2408 * Mapping has changed, invalidate old range and fall through to 2409 * handle validating new mapping. Don't inherit anything from 2410 * oldpte. 2411 */ 2412 if (opa) { 2413 int err; 2414 err = pmap_remove_pte(pmap, NULL, origpte, va); 2415 origpte = 0; 2416 if (err) 2417 panic("pmap_enter: pte vanished, va: 0x%lx", va); 2418 } 2419 2420 /* 2421 * Enter on the PV list if part of our managed memory. Note that we 2422 * raise IPL while manipulating pv_table since pmap_enter can be 2423 * called at interrupt time. 2424 */ 2425 if (pmap_initialized) { 2426 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2427 /* 2428 * WARNING! We are using m's spin-lock as a 2429 * man's pte lock to interlock against 2430 * pmap_page_protect() operations. 2431 * 2432 * This is a bad hack (obviously). 2433 */ 2434 pv = get_pv_entry(); 2435 vm_page_spin_lock(m); 2436 pmap_insert_entry(pmap, va, mpte, m, pv); 2437 pa |= VPTE_MANAGED; 2438 /* vm_page_spin_unlock(m); */ 2439 } else { 2440 vm_page_spin_lock(m); 2441 } 2442 } else { 2443 vm_page_spin_lock(m); 2444 } 2445 2446 /* 2447 * Increment counters 2448 */ 2449 atomic_add_long(&pmap->pm_stats.resident_count, 1); 2450 if (wired) 2451 atomic_add_long(&pmap->pm_stats.wired_count, 1); 2452 2453 validate: 2454 /* 2455 * Now validate mapping with desired protection/wiring. 2456 */ 2457 newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | VPTE_V | VPTE_U); 2458 newpte |= VPTE_A; 2459 2460 if (wired) 2461 newpte |= VPTE_WIRED; 2462 // if (pmap != &kernel_pmap) 2463 newpte |= VPTE_U; 2464 if (newpte & VPTE_RW) 2465 vm_page_flag_set(m, PG_WRITEABLE); 2466 KKASSERT((newpte & VPTE_MANAGED) == 0 || (m->flags & PG_MAPPED)); 2467 2468 origpte = atomic_swap_long(pte, newpte); 2469 if (origpte & VPTE_M) { 2470 kprintf("pmap [M] race @ %016jx\n", va); 2471 atomic_set_long(pte, VPTE_M); 2472 } 2473 vm_page_spin_unlock(m); 2474 2475 if (mpte) 2476 vm_page_wakeup(mpte); 2477 vm_object_drop(pmap->pm_pteobj); 2478 } 2479 2480 /* 2481 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. 2482 * 2483 * Currently this routine may only be used on user pmaps, not kernel_pmap. 2484 * 2485 * No requirements. 2486 */ 2487 void 2488 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) 2489 { 2490 pmap_enter(pmap, va, m, VM_PROT_READ, 0, NULL); 2491 } 2492 2493 /* 2494 * Make a temporary mapping for a physical address. This is only intended 2495 * to be used for panic dumps. 2496 * 2497 * The caller is responsible for calling smp_invltlb(). 2498 */ 2499 void * 2500 pmap_kenter_temporary(vm_paddr_t pa, long i) 2501 { 2502 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa); 2503 return ((void *)crashdumpmap); 2504 } 2505 2506 #define MAX_INIT_PT (96) 2507 2508 /* 2509 * This routine preloads the ptes for a given object into the specified pmap. 2510 * This eliminates the blast of soft faults on process startup and 2511 * immediately after an mmap. 2512 * 2513 * No requirements. 2514 */ 2515 static int pmap_object_init_pt_callback(vm_page_t p, void *data); 2516 2517 void 2518 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, 2519 vm_object_t object, vm_pindex_t pindex, 2520 vm_size_t size, int limit) 2521 { 2522 struct rb_vm_page_scan_info info; 2523 struct lwp *lp; 2524 vm_size_t psize; 2525 2526 /* 2527 * We can't preinit if read access isn't set or there is no pmap 2528 * or object. 2529 */ 2530 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) 2531 return; 2532 2533 /* 2534 * We can't preinit if the pmap is not the current pmap 2535 */ 2536 lp = curthread->td_lwp; 2537 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) 2538 return; 2539 2540 /* 2541 * Misc additional checks 2542 */ 2543 psize = x86_64_btop(size); 2544 2545 if ((object->type != OBJT_VNODE) || 2546 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2547 (object->resident_page_count > MAX_INIT_PT))) { 2548 return; 2549 } 2550 2551 if (psize + pindex > object->size) { 2552 if (object->size < pindex) 2553 return; 2554 psize = object->size - pindex; 2555 } 2556 2557 if (psize == 0) 2558 return; 2559 2560 /* 2561 * Use a red-black scan to traverse the requested range and load 2562 * any valid pages found into the pmap. 2563 * 2564 * We cannot safely scan the object's memq unless we are in a 2565 * critical section since interrupts can remove pages from objects. 2566 */ 2567 info.start_pindex = pindex; 2568 info.end_pindex = pindex + psize - 1; 2569 info.limit = limit; 2570 info.mpte = NULL; 2571 info.addr = addr; 2572 info.pmap = pmap; 2573 2574 vm_object_hold_shared(object); 2575 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, 2576 pmap_object_init_pt_callback, &info); 2577 vm_object_drop(object); 2578 } 2579 2580 static 2581 int 2582 pmap_object_init_pt_callback(vm_page_t p, void *data) 2583 { 2584 struct rb_vm_page_scan_info *info = data; 2585 vm_pindex_t rel_index; 2586 /* 2587 * don't allow an madvise to blow away our really 2588 * free pages allocating pv entries. 2589 */ 2590 if ((info->limit & MAP_PREFAULT_MADVISE) && 2591 vmstats.v_free_count < vmstats.v_free_reserved) { 2592 return(-1); 2593 } 2594 2595 /* 2596 * Ignore list markers and ignore pages we cannot instantly 2597 * busy (while holding the object token). 2598 */ 2599 if (p->flags & PG_MARKER) 2600 return 0; 2601 if (vm_page_busy_try(p, TRUE)) 2602 return 0; 2603 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2604 (p->flags & PG_FICTITIOUS) == 0) { 2605 if ((p->queue - p->pc) == PQ_CACHE) 2606 vm_page_deactivate(p); 2607 rel_index = p->pindex - info->start_pindex; 2608 pmap_enter_quick(info->pmap, 2609 info->addr + x86_64_ptob(rel_index), p); 2610 } 2611 vm_page_wakeup(p); 2612 return(0); 2613 } 2614 2615 /* 2616 * Return TRUE if the pmap is in shape to trivially 2617 * pre-fault the specified address. 2618 * 2619 * Returns FALSE if it would be non-trivial or if a 2620 * pte is already loaded into the slot. 2621 * 2622 * No requirements. 2623 */ 2624 int 2625 pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) 2626 { 2627 pt_entry_t *pte; 2628 pd_entry_t *pde; 2629 int ret; 2630 2631 vm_object_hold(pmap->pm_pteobj); 2632 pde = pmap_pde(pmap, addr); 2633 if (pde == NULL || *pde == 0) { 2634 ret = 0; 2635 } else { 2636 pte = pmap_pde_to_pte(pde, addr); 2637 ret = (*pte) ? 0 : 1; 2638 } 2639 vm_object_drop(pmap->pm_pteobj); 2640 2641 return (ret); 2642 } 2643 2644 /* 2645 * Change the wiring attribute for a map/virtual-address pair. 2646 * 2647 * The mapping must already exist in the pmap. 2648 * No other requirements. 2649 */ 2650 vm_page_t 2651 pmap_unwire(pmap_t pmap, vm_offset_t va) 2652 { 2653 pt_entry_t *pte; 2654 vm_paddr_t pa; 2655 vm_page_t m; 2656 2657 if (pmap == NULL) 2658 return NULL; 2659 2660 vm_object_hold(pmap->pm_pteobj); 2661 pte = pmap_pte(pmap, va); 2662 2663 if (pte == NULL || (*pte & VPTE_V) == 0) { 2664 vm_object_drop(pmap->pm_pteobj); 2665 return NULL; 2666 } 2667 2668 /* 2669 * Wiring is not a hardware characteristic so there is no need to 2670 * invalidate TLB. However, in an SMP environment we must use 2671 * a locked bus cycle to update the pte (if we are not using 2672 * the pmap_inval_*() API that is)... it's ok to do this for simple 2673 * wiring changes. 2674 */ 2675 if (pmap_pte_w(pte)) 2676 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2677 /* XXX else return NULL so caller doesn't unwire m ? */ 2678 atomic_clear_long(pte, VPTE_WIRED); 2679 2680 pa = *pte & VPTE_FRAME; 2681 m = PHYS_TO_VM_PAGE(pa); /* held by wired count */ 2682 2683 vm_object_drop(pmap->pm_pteobj); 2684 2685 return m; 2686 } 2687 2688 /* 2689 * Copy the range specified by src_addr/len 2690 * from the source map to the range dst_addr/len 2691 * in the destination map. 2692 * 2693 * This routine is only advisory and need not do anything. 2694 */ 2695 void 2696 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2697 vm_size_t len, vm_offset_t src_addr) 2698 { 2699 /* 2700 * XXX BUGGY. Amoung other things srcmpte is assumed to remain 2701 * valid through blocking calls, and that's just not going to 2702 * be the case. 2703 * 2704 * FIXME! 2705 */ 2706 return; 2707 } 2708 2709 /* 2710 * pmap_zero_page: 2711 * 2712 * Zero the specified physical page. 2713 * 2714 * This function may be called from an interrupt and no locking is 2715 * required. 2716 */ 2717 void 2718 pmap_zero_page(vm_paddr_t phys) 2719 { 2720 vm_offset_t va = PHYS_TO_DMAP(phys); 2721 2722 bzero((void *)va, PAGE_SIZE); 2723 } 2724 2725 /* 2726 * pmap_zero_page: 2727 * 2728 * Zero part of a physical page by mapping it into memory and clearing 2729 * its contents with bzero. 2730 * 2731 * off and size may not cover an area beyond a single hardware page. 2732 */ 2733 void 2734 pmap_zero_page_area(vm_paddr_t phys, int off, int size) 2735 { 2736 vm_offset_t virt = PHYS_TO_DMAP(phys); 2737 2738 bzero((char *)virt + off, size); 2739 } 2740 2741 /* 2742 * pmap_copy_page: 2743 * 2744 * Copy the physical page from the source PA to the target PA. 2745 * This function may be called from an interrupt. No locking 2746 * is required. 2747 */ 2748 void 2749 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) 2750 { 2751 vm_offset_t src_virt, dst_virt; 2752 2753 src_virt = PHYS_TO_DMAP(src); 2754 dst_virt = PHYS_TO_DMAP(dst); 2755 bcopy((void *)src_virt, (void *)dst_virt, PAGE_SIZE); 2756 } 2757 2758 /* 2759 * pmap_copy_page_frag: 2760 * 2761 * Copy the physical page from the source PA to the target PA. 2762 * This function may be called from an interrupt. No locking 2763 * is required. 2764 */ 2765 void 2766 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) 2767 { 2768 vm_offset_t src_virt, dst_virt; 2769 2770 src_virt = PHYS_TO_DMAP(src); 2771 dst_virt = PHYS_TO_DMAP(dst); 2772 bcopy((char *)src_virt + (src & PAGE_MASK), 2773 (char *)dst_virt + (dst & PAGE_MASK), 2774 bytes); 2775 } 2776 2777 /* 2778 * Returns true if the pmap's pv is one of the first 16 pvs linked to 2779 * from this page. This count may be changed upwards or downwards 2780 * in the future; it is only necessary that true be returned for a small 2781 * subset of pmaps for proper page aging. 2782 * 2783 * No other requirements. 2784 */ 2785 boolean_t 2786 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2787 { 2788 pv_entry_t pv; 2789 int loops = 0; 2790 2791 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2792 return FALSE; 2793 2794 vm_page_spin_lock(m); 2795 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2796 if (pv->pv_pmap == pmap) { 2797 vm_page_spin_unlock(m); 2798 return TRUE; 2799 } 2800 loops++; 2801 if (loops >= 16) 2802 break; 2803 } 2804 vm_page_spin_unlock(m); 2805 2806 return (FALSE); 2807 } 2808 2809 /* 2810 * Remove all pages from specified address space this aids process 2811 * exit speeds. Also, this code is special cased for current 2812 * process only, but can have the more generic (and slightly slower) 2813 * mode enabled. This is much faster than pmap_remove in the case 2814 * of running down an entire address space. 2815 * 2816 * No other requirements. 2817 */ 2818 void 2819 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2820 { 2821 pmap_remove(pmap, sva, eva); 2822 #if 0 2823 pt_entry_t *pte, tpte; 2824 pv_entry_t pv, npv; 2825 vm_page_t m; 2826 int save_generation; 2827 2828 if (pmap->pm_pteobj) 2829 vm_object_hold(pmap->pm_pteobj); 2830 2831 pmap_invalidate_range(pmap, sva, eva); 2832 2833 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2834 if (pv->pv_va >= eva || pv->pv_va < sva) { 2835 npv = TAILQ_NEXT(pv, pv_plist); 2836 continue; 2837 } 2838 2839 KKASSERT(pmap == pv->pv_pmap); 2840 2841 pte = pmap_pte(pmap, pv->pv_va); 2842 2843 /* 2844 * We cannot remove wired pages from a process' mapping 2845 * at this time 2846 */ 2847 if (*pte & VPTE_WIRED) { 2848 npv = TAILQ_NEXT(pv, pv_plist); 2849 continue; 2850 } 2851 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2852 2853 m = PHYS_TO_VM_PAGE(tpte & VPTE_FRAME); 2854 vm_page_spin_lock(m); 2855 2856 KASSERT(m < &vm_page_array[vm_page_array_size], 2857 ("pmap_remove_pages: bad tpte %lx", tpte)); 2858 2859 KKASSERT(pmap->pm_stats.resident_count > 0); 2860 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2861 2862 /* 2863 * Update the vm_page_t clean and reference bits. 2864 */ 2865 if (tpte & VPTE_M) { 2866 vm_page_dirty(m); 2867 } 2868 2869 npv = TAILQ_NEXT(pv, pv_plist); 2870 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 2871 atomic_add_int(&pmap->pm_generation, 1); 2872 save_generation = pmap->pm_generation; 2873 m->md.pv_list_count--; 2874 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2875 if (TAILQ_EMPTY(&m->md.pv_list)) 2876 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2877 vm_page_spin_unlock(m); 2878 2879 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2880 free_pv_entry(pv); 2881 2882 /* 2883 * Restart the scan if we blocked during the unuse or free 2884 * calls and other removals were made. 2885 */ 2886 if (save_generation != pmap->pm_generation) { 2887 kprintf("Warning: pmap_remove_pages race-A avoided\n"); 2888 npv = TAILQ_FIRST(&pmap->pm_pvlist); 2889 } 2890 } 2891 if (pmap->pm_pteobj) 2892 vm_object_drop(pmap->pm_pteobj); 2893 pmap_remove(pmap, sva, eva); 2894 #endif 2895 } 2896 2897 /* 2898 * pmap_testbit tests bits in active mappings of a VM page. 2899 */ 2900 static boolean_t 2901 pmap_testbit(vm_page_t m, int bit) 2902 { 2903 pv_entry_t pv; 2904 pt_entry_t *pte; 2905 2906 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2907 return FALSE; 2908 2909 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2910 return FALSE; 2911 2912 vm_page_spin_lock(m); 2913 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2914 /* 2915 * if the bit being tested is the modified bit, then 2916 * mark clean_map and ptes as never 2917 * modified. 2918 */ 2919 if (bit & (VPTE_A|VPTE_M)) { 2920 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 2921 continue; 2922 } 2923 2924 #if defined(PMAP_DIAGNOSTIC) 2925 if (pv->pv_pmap == NULL) { 2926 kprintf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 2927 continue; 2928 } 2929 #endif 2930 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2931 if (*pte & bit) { 2932 vm_page_spin_unlock(m); 2933 return TRUE; 2934 } 2935 } 2936 vm_page_spin_unlock(m); 2937 return (FALSE); 2938 } 2939 2940 /* 2941 * This routine is used to clear bits in ptes. Certain bits require special 2942 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. 2943 * 2944 * This routine is only called with certain VPTE_* bit combinations. 2945 */ 2946 static __inline void 2947 pmap_clearbit(vm_page_t m, int bit) 2948 { 2949 pv_entry_t pv; 2950 pt_entry_t *pte; 2951 pt_entry_t pbits; 2952 vm_object_t pmobj; 2953 pmap_t pmap; 2954 2955 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2956 if (bit == VPTE_RW) 2957 vm_page_flag_clear(m, PG_WRITEABLE); 2958 return; 2959 } 2960 2961 /* 2962 * Loop over all current mappings setting/clearing as appropos If 2963 * setting RO do we need to clear the VAC? 2964 */ 2965 restart: 2966 vm_page_spin_lock(m); 2967 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2968 /* 2969 * Need the pmap object lock(?) 2970 */ 2971 pmap = pv->pv_pmap; 2972 pmobj = pmap->pm_pteobj; 2973 2974 if (vm_object_hold_try(pmobj) == 0) { 2975 refcount_acquire(&pmobj->hold_count); 2976 vm_page_spin_unlock(m); 2977 vm_object_lock(pmobj); 2978 vm_object_drop(pmobj); 2979 goto restart; 2980 } 2981 2982 /* 2983 * don't write protect pager mappings 2984 */ 2985 if (bit == VPTE_RW) { 2986 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) { 2987 vm_object_drop(pmobj); 2988 continue; 2989 } 2990 } 2991 2992 #if defined(PMAP_DIAGNOSTIC) 2993 if (pv->pv_pmap == NULL) { 2994 kprintf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va); 2995 vm_object_drop(pmobj); 2996 continue; 2997 } 2998 #endif 2999 3000 /* 3001 * Careful here. We can use a locked bus instruction to 3002 * clear VPTE_A or VPTE_M safely but we need to synchronize 3003 * with the target cpus when we mess with VPTE_RW. 3004 * 3005 * On virtual kernels we must force a new fault-on-write 3006 * in the real kernel if we clear the Modify bit ourselves, 3007 * otherwise the real kernel will not get a new fault and 3008 * will never set our Modify bit again. 3009 */ 3010 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3011 if (*pte & bit) { 3012 if (bit == VPTE_RW) { 3013 /* 3014 * We must also clear VPTE_M when clearing 3015 * VPTE_RW and synchronize its state to 3016 * the page. 3017 */ 3018 pbits = pmap_clean_pte(pte, pv->pv_pmap, 3019 pv->pv_va, m); 3020 } else if (bit == VPTE_M) { 3021 /* 3022 * We must invalidate the real-kernel pte 3023 * when clearing VPTE_M bit to force the 3024 * real-kernel to take a new fault to re-set 3025 * VPTE_M. 3026 */ 3027 atomic_clear_long(pte, VPTE_M); 3028 if (*pte & VPTE_RW) { 3029 pmap_invalidate_range(pv->pv_pmap, 3030 pv->pv_va, 3031 pv->pv_va + PAGE_SIZE); 3032 } 3033 } else if ((bit & (VPTE_RW|VPTE_M)) == 3034 (VPTE_RW|VPTE_M)) { 3035 /* 3036 * We've been asked to clear W & M, I guess 3037 * the caller doesn't want us to update 3038 * the dirty status of the VM page. 3039 */ 3040 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va, m); 3041 panic("shouldn't be called"); 3042 } else { 3043 /* 3044 * We've been asked to clear bits that do 3045 * not interact with hardware. 3046 */ 3047 atomic_clear_long(pte, bit); 3048 } 3049 } 3050 vm_object_drop(pmobj); 3051 } 3052 if (bit == VPTE_RW) 3053 vm_page_flag_clear(m, PG_WRITEABLE); 3054 vm_page_spin_unlock(m); 3055 } 3056 3057 /* 3058 * Lower the permission for all mappings to a given page. 3059 * 3060 * No other requirements. 3061 */ 3062 void 3063 pmap_page_protect(vm_page_t m, vm_prot_t prot) 3064 { 3065 /* JG NX support? */ 3066 if ((prot & VM_PROT_WRITE) == 0) { 3067 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3068 pmap_clearbit(m, VPTE_RW); 3069 } else { 3070 pmap_remove_all(m); 3071 } 3072 } 3073 } 3074 3075 vm_paddr_t 3076 pmap_phys_address(vm_pindex_t ppn) 3077 { 3078 return (x86_64_ptob(ppn)); 3079 } 3080 3081 /* 3082 * Return a count of reference bits for a page, clearing those bits. 3083 * It is not necessary for every reference bit to be cleared, but it 3084 * is necessary that 0 only be returned when there are truly no 3085 * reference bits set. 3086 * 3087 * XXX: The exact number of bits to check and clear is a matter that 3088 * should be tested and standardized at some point in the future for 3089 * optimal aging of shared pages. 3090 * 3091 * No other requirements. 3092 */ 3093 int 3094 pmap_ts_referenced(vm_page_t m) 3095 { 3096 pv_entry_t pv, pvf, pvn; 3097 pt_entry_t *pte; 3098 int rtval = 0; 3099 3100 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3101 return (rtval); 3102 3103 vm_page_spin_lock(m); 3104 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3105 pvf = pv; 3106 do { 3107 pvn = TAILQ_NEXT(pv, pv_list); 3108 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3109 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3110 3111 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3112 continue; 3113 3114 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3115 3116 if (pte && (*pte & VPTE_A)) { 3117 atomic_clear_long(pte, VPTE_A); 3118 rtval++; 3119 if (rtval > 4) { 3120 break; 3121 } 3122 } 3123 } while ((pv = pvn) != NULL && pv != pvf); 3124 } 3125 vm_page_spin_unlock(m); 3126 3127 return (rtval); 3128 } 3129 3130 /* 3131 * Return whether or not the specified physical page was modified 3132 * in any physical maps. 3133 * 3134 * No other requirements. 3135 */ 3136 boolean_t 3137 pmap_is_modified(vm_page_t m) 3138 { 3139 boolean_t res; 3140 3141 res = pmap_testbit(m, VPTE_M); 3142 3143 return (res); 3144 } 3145 3146 /* 3147 * Clear the modify bits on the specified physical page. For the vkernel 3148 * we really need to clean the page, which clears VPTE_RW and VPTE_M, in 3149 * order to ensure that we take a fault on the next write to the page. 3150 * Otherwise the page may become dirty without us knowing it. 3151 * 3152 * No other requirements. 3153 */ 3154 void 3155 pmap_clear_modify(vm_page_t m) 3156 { 3157 pmap_clearbit(m, VPTE_RW); 3158 } 3159 3160 /* 3161 * Clear the reference bit on the specified physical page. 3162 * 3163 * No other requirements. 3164 */ 3165 void 3166 pmap_clear_reference(vm_page_t m) 3167 { 3168 pmap_clearbit(m, VPTE_A); 3169 } 3170 3171 /* 3172 * Miscellaneous support routines follow 3173 */ 3174 3175 static void 3176 i386_protection_init(void) 3177 { 3178 int *kp, prot; 3179 3180 kp = protection_codes; 3181 for (prot = 0; prot < 8; prot++) { 3182 if (prot & VM_PROT_READ) 3183 *kp |= 0; /* if it's VALID is readeable */ 3184 if (prot & VM_PROT_WRITE) 3185 *kp |= VPTE_RW; 3186 if (prot & VM_PROT_EXECUTE) 3187 *kp |= 0; /* if it's VALID is executable */ 3188 ++kp; 3189 } 3190 } 3191 3192 /* 3193 * Sets the memory attribute for the specified page. 3194 */ 3195 void 3196 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3197 { 3198 /* This is a vkernel, do nothing */ 3199 } 3200 3201 /* 3202 * Change the PAT attribute on an existing kernel memory map. Caller 3203 * must ensure that the virtual memory in question is not accessed 3204 * during the adjustment. 3205 */ 3206 void 3207 pmap_change_attr(vm_offset_t va, vm_size_t count, int mode) 3208 { 3209 /* This is a vkernel, do nothing */ 3210 } 3211 3212 /* 3213 * Perform the pmap work for mincore 3214 * 3215 * No other requirements. 3216 */ 3217 int 3218 pmap_mincore(pmap_t pmap, vm_offset_t addr) 3219 { 3220 pt_entry_t *ptep, pte; 3221 vm_page_t m; 3222 int val = 0; 3223 3224 vm_object_hold(pmap->pm_pteobj); 3225 ptep = pmap_pte(pmap, addr); 3226 3227 if (ptep && (pte = *ptep) != 0) { 3228 vm_paddr_t pa; 3229 3230 val = MINCORE_INCORE; 3231 if ((pte & VPTE_MANAGED) == 0) 3232 goto done; 3233 3234 pa = pte & VPTE_FRAME; 3235 3236 m = PHYS_TO_VM_PAGE(pa); 3237 3238 /* 3239 * Modified by us 3240 */ 3241 if (pte & VPTE_M) 3242 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3243 /* 3244 * Modified by someone 3245 */ 3246 else if (m->dirty || pmap_is_modified(m)) 3247 val |= MINCORE_MODIFIED_OTHER; 3248 /* 3249 * Referenced by us 3250 */ 3251 if (pte & VPTE_A) 3252 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3253 3254 /* 3255 * Referenced by someone 3256 */ 3257 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3258 val |= MINCORE_REFERENCED_OTHER; 3259 vm_page_flag_set(m, PG_REFERENCED); 3260 } 3261 } 3262 done: 3263 vm_object_drop(pmap->pm_pteobj); 3264 3265 return val; 3266 } 3267 3268 /* 3269 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new 3270 * vmspace will be ref'd and the old one will be deref'd. 3271 * 3272 * Caller must hold vmspace->vm_map.token for oldvm and newvm 3273 */ 3274 void 3275 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) 3276 { 3277 struct vmspace *oldvm; 3278 struct lwp *lp; 3279 3280 oldvm = p->p_vmspace; 3281 if (oldvm != newvm) { 3282 if (adjrefs) 3283 vmspace_ref(newvm); 3284 KKASSERT((newvm->vm_refcnt & VM_REF_DELETED) == 0); 3285 p->p_vmspace = newvm; 3286 KKASSERT(p->p_nthreads == 1); 3287 lp = RB_ROOT(&p->p_lwp_tree); 3288 pmap_setlwpvm(lp, newvm); 3289 if (adjrefs) 3290 vmspace_rel(oldvm); 3291 } 3292 } 3293 3294 /* 3295 * Set the vmspace for a LWP. The vmspace is almost universally set the 3296 * same as the process vmspace, but virtual kernels need to swap out contexts 3297 * on a per-lwp basis. 3298 */ 3299 void 3300 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) 3301 { 3302 struct vmspace *oldvm; 3303 struct pmap *pmap; 3304 3305 oldvm = lp->lwp_vmspace; 3306 if (oldvm != newvm) { 3307 crit_enter(); 3308 KKASSERT((newvm->vm_refcnt & VM_REF_DELETED) == 0); 3309 lp->lwp_vmspace = newvm; 3310 if (curthread->td_lwp == lp) { 3311 pmap = vmspace_pmap(newvm); 3312 ATOMIC_CPUMASK_ORBIT(pmap->pm_active, mycpu->gd_cpuid); 3313 if (pmap->pm_active_lock & CPULOCK_EXCL) 3314 pmap_interlock_wait(newvm); 3315 #if defined(SWTCH_OPTIM_STATS) 3316 tlb_flush_count++; 3317 #endif 3318 pmap = vmspace_pmap(oldvm); 3319 ATOMIC_CPUMASK_NANDBIT(pmap->pm_active, 3320 mycpu->gd_cpuid); 3321 } 3322 crit_exit(); 3323 } 3324 } 3325 3326 /* 3327 * The swtch code tried to switch in a heavy weight process whos pmap 3328 * is locked by another cpu. We have to wait for the lock to clear before 3329 * the pmap can be used. 3330 */ 3331 void 3332 pmap_interlock_wait (struct vmspace *vm) 3333 { 3334 pmap_t pmap = vmspace_pmap(vm); 3335 3336 if (pmap->pm_active_lock & CPULOCK_EXCL) { 3337 crit_enter(); 3338 while (pmap->pm_active_lock & CPULOCK_EXCL) { 3339 cpu_ccfence(); 3340 pthread_yield(); 3341 } 3342 crit_exit(); 3343 } 3344 } 3345 3346 vm_offset_t 3347 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3348 { 3349 3350 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3351 return addr; 3352 } 3353 3354 addr = roundup2(addr, NBPDR); 3355 return addr; 3356 } 3357 3358 /* 3359 * Used by kmalloc/kfree, page already exists at va 3360 */ 3361 vm_page_t 3362 pmap_kvtom(vm_offset_t va) 3363 { 3364 vpte_t *ptep; 3365 3366 KKASSERT(va >= KvaStart && va < KvaEnd); 3367 ptep = vtopte(va); 3368 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); 3369 } 3370 3371 void 3372 pmap_object_init(vm_object_t object) 3373 { 3374 /* empty */ 3375 } 3376 3377 void 3378 pmap_object_free(vm_object_t object) 3379 { 3380 /* empty */ 3381 } 3382 3383 void 3384 pmap_pgscan(struct pmap_pgscan_info *pginfo) 3385 { 3386 pmap_t pmap = pginfo->pmap; 3387 vm_offset_t sva = pginfo->beg_addr; 3388 vm_offset_t eva = pginfo->end_addr; 3389 vm_offset_t va_next; 3390 pml4_entry_t *pml4e; 3391 pdp_entry_t *pdpe; 3392 pd_entry_t ptpaddr, *pde; 3393 pt_entry_t *pte; 3394 vm_page_t pt_m; 3395 int stop = 0; 3396 3397 vm_object_hold(pmap->pm_pteobj); 3398 3399 for (; sva < eva; sva = va_next) { 3400 if (stop) 3401 break; 3402 3403 pml4e = pmap_pml4e(pmap, sva); 3404 if ((*pml4e & VPTE_V) == 0) { 3405 va_next = (sva + NBPML4) & ~PML4MASK; 3406 if (va_next < sva) 3407 va_next = eva; 3408 continue; 3409 } 3410 3411 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 3412 if ((*pdpe & VPTE_V) == 0) { 3413 va_next = (sva + NBPDP) & ~PDPMASK; 3414 if (va_next < sva) 3415 va_next = eva; 3416 continue; 3417 } 3418 3419 va_next = (sva + NBPDR) & ~PDRMASK; 3420 if (va_next < sva) 3421 va_next = eva; 3422 3423 pde = pmap_pdpe_to_pde(pdpe, sva); 3424 ptpaddr = *pde; 3425 3426 #if 0 3427 /* 3428 * Check for large page (ignore). 3429 */ 3430 if ((ptpaddr & VPTE_PS) != 0) { 3431 #if 0 3432 pmap_clean_pde(pde, pmap, sva); 3433 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 3434 #endif 3435 continue; 3436 } 3437 #endif 3438 3439 /* 3440 * Weed out invalid mappings. Note: we assume that the page 3441 * directory table is always allocated, and in kernel virtual. 3442 */ 3443 if (ptpaddr == 0) 3444 continue; 3445 3446 if (va_next > eva) 3447 va_next = eva; 3448 3449 pt_m = pmap_hold_pt_page(pde, sva); 3450 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 3451 sva += PAGE_SIZE) { 3452 vm_page_t m; 3453 3454 if (stop) 3455 break; 3456 if ((*pte & VPTE_MANAGED) == 0) 3457 continue; 3458 3459 m = PHYS_TO_VM_PAGE(*pte & VPTE_FRAME); 3460 if (vm_page_busy_try(m, TRUE) == 0) { 3461 if (pginfo->callback(pginfo, sva, m) < 0) 3462 stop = 1; 3463 } 3464 } 3465 vm_page_unhold(pt_m); 3466 } 3467 vm_object_drop(pmap->pm_pteobj); 3468 } 3469