1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1991 Regents of the University of California. 5 * Copyright (c) 1994 John S. Dyson 6 * Copyright (c) 1994 David Greenman 7 * Copyright (c) 2003 Peter Wemm 8 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 9 * Copyright (c) 2008, 2009 The DragonFly Project. 10 * Copyright (c) 2008, 2009 Jordan Gordeev. 11 * All rights reserved. 12 * 13 * This code is derived from software contributed to Berkeley by 14 * the Systems Programming Group of the University of Utah Computer 15 * Science Department and William Jolitz of UUNET Technologies Inc. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. All advertising materials mentioning features or use of this software 26 * must display the following acknowledgement: 27 * This product includes software developed by the University of 28 * California, Berkeley and its contributors. 29 * 4. Neither the name of the University nor the names of its contributors 30 * may be used to endorse or promote products derived from this software 31 * without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 * SUCH DAMAGE. 44 * 45 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 46 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ 47 */ 48 49 /* 50 * Manages physical address maps. 51 */ 52 53 #if JG 54 #include "opt_pmap.h" 55 #endif 56 #include "opt_msgbuf.h" 57 58 #include <sys/param.h> 59 #include <sys/systm.h> 60 #include <sys/kernel.h> 61 #include <sys/proc.h> 62 #include <sys/msgbuf.h> 63 #include <sys/vmmeter.h> 64 #include <sys/mman.h> 65 #include <sys/vmspace.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <sys/sysctl.h> 70 #include <sys/lock.h> 71 #include <vm/vm_kern.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_pageout.h> 77 #include <vm/vm_pager.h> 78 #include <vm/vm_zone.h> 79 80 #include <sys/user.h> 81 #include <sys/thread2.h> 82 #include <sys/sysref2.h> 83 #include <sys/spinlock2.h> 84 #include <vm/vm_page2.h> 85 86 #include <machine/cputypes.h> 87 #include <machine/md_var.h> 88 #include <machine/specialreg.h> 89 #include <machine/smp.h> 90 #include <machine/globaldata.h> 91 #include <machine/pmap.h> 92 #include <machine/pmap_inval.h> 93 94 #include <ddb/ddb.h> 95 96 #include <stdio.h> 97 #include <assert.h> 98 #include <stdlib.h> 99 #include <pthread.h> 100 101 #define PMAP_KEEP_PDIRS 102 #ifndef PMAP_SHPGPERPROC 103 #define PMAP_SHPGPERPROC 1000 104 #endif 105 106 #if defined(DIAGNOSTIC) 107 #define PMAP_DIAGNOSTIC 108 #endif 109 110 #define MINPV 2048 111 112 #if !defined(PMAP_DIAGNOSTIC) 113 #define PMAP_INLINE __inline 114 #else 115 #define PMAP_INLINE 116 #endif 117 118 /* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121 static pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va); 122 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 123 124 #define pmap_pde_v(pte) ((*(pd_entry_t *)pte & VPTE_V) != 0) 125 #define pmap_pte_w(pte) ((*(pt_entry_t *)pte & VPTE_WIRED) != 0) 126 #define pmap_pte_m(pte) ((*(pt_entry_t *)pte & VPTE_M) != 0) 127 #define pmap_pte_u(pte) ((*(pt_entry_t *)pte & VPTE_A) != 0) 128 #define pmap_pte_v(pte) ((*(pt_entry_t *)pte & VPTE_V) != 0) 129 130 /* 131 * Given a map and a machine independent protection code, 132 * convert to a vax protection code. 133 */ 134 #define pte_prot(m, p) \ 135 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) 136 static int protection_codes[8]; 137 138 struct pmap kernel_pmap; 139 static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list); 140 141 static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 142 143 static vm_object_t kptobj; 144 145 static int nkpt; 146 147 static uint64_t KPDphys; /* phys addr of kernel level 2 */ 148 uint64_t KPDPphys; /* phys addr of kernel level 3 */ 149 uint64_t KPML4phys; /* phys addr of kernel level 4 */ 150 151 extern int vmm_enabled; 152 extern void *vkernel_stack; 153 154 /* 155 * Data for the pv entry allocation mechanism 156 */ 157 static vm_zone_t pvzone; 158 static struct vm_zone pvzone_store; 159 static struct vm_object pvzone_obj; 160 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; 161 static int pmap_pagedaemon_waken = 0; 162 static struct pv_entry *pvinit; 163 164 /* 165 * All those kernel PT submaps that BSD is so fond of 166 */ 167 pt_entry_t *CMAP1 = NULL, *ptmmap; 168 caddr_t CADDR1 = NULL; 169 static pt_entry_t *msgbufmap; 170 171 uint64_t KPTphys; 172 173 static PMAP_INLINE void free_pv_entry (pv_entry_t pv); 174 static pv_entry_t get_pv_entry (void); 175 static void i386_protection_init (void); 176 static __inline void pmap_clearbit (vm_page_t m, int bit); 177 178 static void pmap_remove_all (vm_page_t m); 179 static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, 180 vm_offset_t sva); 181 static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); 182 static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, 183 vm_offset_t va); 184 static boolean_t pmap_testbit (vm_page_t m, int bit); 185 static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, 186 vm_page_t mpte, vm_page_t m); 187 188 static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); 189 190 static int pmap_release_free_page (pmap_t pmap, vm_page_t p); 191 static vm_page_t _pmap_allocpte (pmap_t pmap, vm_pindex_t ptepindex); 192 #if JGPMAP32 193 static pt_entry_t * pmap_pte_quick (pmap_t pmap, vm_offset_t va); 194 #endif 195 static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); 196 static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); 197 198 /* 199 * pmap_pte_quick: 200 * 201 * Super fast pmap_pte routine best used when scanning the pv lists. 202 * This eliminates many course-grained invltlb calls. Note that many of 203 * the pv list scans are across different pmaps and it is very wasteful 204 * to do an entire invltlb when checking a single mapping. 205 * 206 * Should only be called while in a critical section. 207 */ 208 #if JGPMAP32 209 static __inline pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t va); 210 211 static pt_entry_t * 212 pmap_pte_quick(pmap_t pmap, vm_offset_t va) 213 { 214 return pmap_pte(pmap, va); 215 } 216 #endif 217 218 /* Return a non-clipped PD index for a given VA */ 219 static __inline vm_pindex_t 220 pmap_pde_pindex(vm_offset_t va) 221 { 222 return va >> PDRSHIFT; 223 } 224 225 /* Return various clipped indexes for a given VA */ 226 static __inline vm_pindex_t 227 pmap_pte_index(vm_offset_t va) 228 { 229 230 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 231 } 232 233 static __inline vm_pindex_t 234 pmap_pde_index(vm_offset_t va) 235 { 236 237 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 238 } 239 240 static __inline vm_pindex_t 241 pmap_pdpe_index(vm_offset_t va) 242 { 243 244 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 245 } 246 247 static __inline vm_pindex_t 248 pmap_pml4e_index(vm_offset_t va) 249 { 250 251 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 252 } 253 254 /* Return a pointer to the PML4 slot that corresponds to a VA */ 255 static __inline pml4_entry_t * 256 pmap_pml4e(pmap_t pmap, vm_offset_t va) 257 { 258 259 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 260 } 261 262 /* Return a pointer to the PDP slot that corresponds to a VA */ 263 static __inline pdp_entry_t * 264 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) 265 { 266 pdp_entry_t *pdpe; 267 268 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & VPTE_FRAME); 269 return (&pdpe[pmap_pdpe_index(va)]); 270 } 271 272 /* Return a pointer to the PDP slot that corresponds to a VA */ 273 static __inline pdp_entry_t * 274 pmap_pdpe(pmap_t pmap, vm_offset_t va) 275 { 276 pml4_entry_t *pml4e; 277 278 pml4e = pmap_pml4e(pmap, va); 279 if ((*pml4e & VPTE_V) == 0) 280 return NULL; 281 return (pmap_pml4e_to_pdpe(pml4e, va)); 282 } 283 284 /* Return a pointer to the PD slot that corresponds to a VA */ 285 static __inline pd_entry_t * 286 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) 287 { 288 pd_entry_t *pde; 289 290 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & VPTE_FRAME); 291 return (&pde[pmap_pde_index(va)]); 292 } 293 294 /* Return a pointer to the PD slot that corresponds to a VA */ 295 static __inline pd_entry_t * 296 pmap_pde(pmap_t pmap, vm_offset_t va) 297 { 298 pdp_entry_t *pdpe; 299 300 pdpe = pmap_pdpe(pmap, va); 301 if (pdpe == NULL || (*pdpe & VPTE_V) == 0) 302 return NULL; 303 return (pmap_pdpe_to_pde(pdpe, va)); 304 } 305 306 /* Return a pointer to the PT slot that corresponds to a VA */ 307 static __inline pt_entry_t * 308 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 309 { 310 pt_entry_t *pte; 311 312 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & VPTE_FRAME); 313 return (&pte[pmap_pte_index(va)]); 314 } 315 316 /* Return a pointer to the PT slot that corresponds to a VA */ 317 static __inline pt_entry_t * 318 pmap_pte(pmap_t pmap, vm_offset_t va) 319 { 320 pd_entry_t *pde; 321 322 pde = pmap_pde(pmap, va); 323 if (pde == NULL || (*pde & VPTE_V) == 0) 324 return NULL; 325 if ((*pde & VPTE_PS) != 0) /* compat with i386 pmap_pte() */ 326 return ((pt_entry_t *)pde); 327 return (pmap_pde_to_pte(pde, va)); 328 } 329 330 331 #if JGV 332 PMAP_INLINE pt_entry_t * 333 vtopte(vm_offset_t va) 334 { 335 uint64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + 336 NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 337 338 return (PTmap + ((va >> PAGE_SHIFT) & mask)); 339 } 340 341 static __inline pd_entry_t * 342 vtopde(vm_offset_t va) 343 { 344 uint64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + 345 NPML4EPGSHIFT)) - 1); 346 347 return (PDmap + ((va >> PDRSHIFT) & mask)); 348 } 349 #else 350 static PMAP_INLINE pt_entry_t * 351 vtopte(vm_offset_t va) 352 { 353 pt_entry_t *x; 354 x = pmap_pte(&kernel_pmap, va); 355 assert(x != NULL); 356 return x; 357 } 358 359 static __inline pd_entry_t * 360 vtopde(vm_offset_t va) 361 { 362 pd_entry_t *x; 363 x = pmap_pde(&kernel_pmap, va); 364 assert(x != NULL); 365 return x; 366 } 367 #endif 368 369 static uint64_t 370 allocpages(vm_paddr_t *firstaddr, int n) 371 { 372 uint64_t ret; 373 374 ret = *firstaddr; 375 #if JGV 376 bzero((void *)ret, n * PAGE_SIZE); 377 #endif 378 *firstaddr += n * PAGE_SIZE; 379 return (ret); 380 } 381 382 static void 383 create_dmap_vmm(vm_paddr_t *firstaddr) 384 { 385 void *stack_addr; 386 int pml4_stack_index; 387 int pdp_stack_index; 388 int pd_stack_index; 389 long i,j; 390 int regs[4]; 391 int amd_feature; 392 393 uint64_t KPDP_DMAP_phys = allocpages(firstaddr, NDMPML4E); 394 uint64_t KPDP_VSTACK_phys = allocpages(firstaddr, 1); 395 uint64_t KPD_VSTACK_phys = allocpages(firstaddr, 1); 396 397 pml4_entry_t *KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 398 pdp_entry_t *KPDP_DMAP_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_DMAP_phys); 399 pdp_entry_t *KPDP_VSTACK_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_VSTACK_phys); 400 pd_entry_t *KPD_VSTACK_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_VSTACK_phys); 401 402 bzero(KPDP_DMAP_virt, NDMPML4E * PAGE_SIZE); 403 bzero(KPDP_VSTACK_virt, 1 * PAGE_SIZE); 404 bzero(KPD_VSTACK_virt, 1 * PAGE_SIZE); 405 406 do_cpuid(0x80000001, regs); 407 amd_feature = regs[3]; 408 409 /* Build the mappings for the first 512GB */ 410 if (amd_feature & AMDID_PAGE1GB) { 411 /* In pages of 1 GB, if supported */ 412 for (i = 0; i < NPDPEPG; i++) { 413 KPDP_DMAP_virt[i] = ((uint64_t)i << PDPSHIFT); 414 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 415 } 416 } else { 417 /* In page of 2MB, otherwise */ 418 for (i = 0; i < NPDPEPG; i++) { 419 uint64_t KPD_DMAP_phys = allocpages(firstaddr, 1); 420 pd_entry_t *KPD_DMAP_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_DMAP_phys); 421 422 bzero(KPD_DMAP_virt, PAGE_SIZE); 423 424 KPDP_DMAP_virt[i] = KPD_DMAP_phys; 425 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_U; 426 427 /* For each PD, we have to allocate NPTEPG PT */ 428 for (j = 0; j < NPTEPG; j++) { 429 KPD_DMAP_virt[j] = (i << PDPSHIFT) | (j << PDRSHIFT); 430 KPD_DMAP_virt[j] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 431 } 432 } 433 } 434 435 /* DMAP for the first 512G */ 436 KPML4virt[0] = KPDP_DMAP_phys; 437 KPML4virt[0] |= VPTE_RW | VPTE_V | VPTE_U; 438 439 /* create a 2 MB map of the new stack */ 440 pml4_stack_index = (uint64_t)&stack_addr >> PML4SHIFT; 441 KPML4virt[pml4_stack_index] = KPDP_VSTACK_phys; 442 KPML4virt[pml4_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 443 444 pdp_stack_index = ((uint64_t)&stack_addr & PML4MASK) >> PDPSHIFT; 445 KPDP_VSTACK_virt[pdp_stack_index] = KPD_VSTACK_phys; 446 KPDP_VSTACK_virt[pdp_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 447 448 pd_stack_index = ((uint64_t)&stack_addr & PDPMASK) >> PDRSHIFT; 449 KPD_VSTACK_virt[pd_stack_index] = (uint64_t) vkernel_stack; 450 KPD_VSTACK_virt[pd_stack_index] |= VPTE_RW | VPTE_V | VPTE_U | VPTE_PS; 451 } 452 453 static void 454 create_pagetables(vm_paddr_t *firstaddr, int64_t ptov_offset) 455 { 456 int i; 457 pml4_entry_t *KPML4virt; 458 pdp_entry_t *KPDPvirt; 459 pd_entry_t *KPDvirt; 460 pt_entry_t *KPTvirt; 461 int kpml4i = pmap_pml4e_index(ptov_offset); 462 int kpdpi = pmap_pdpe_index(ptov_offset); 463 int kpdi = pmap_pde_index(ptov_offset); 464 465 /* 466 * Calculate NKPT - number of kernel page tables. We have to 467 * accomodoate prealloction of the vm_page_array, dump bitmap, 468 * MSGBUF_SIZE, and other stuff. Be generous. 469 * 470 * Maxmem is in pages. 471 */ 472 nkpt = (Maxmem * (sizeof(struct vm_page) * 2) + MSGBUF_SIZE) / NBPDR; 473 /* 474 * Allocate pages 475 */ 476 KPML4phys = allocpages(firstaddr, 1); 477 KPDPphys = allocpages(firstaddr, NKPML4E); 478 KPDphys = allocpages(firstaddr, NKPDPE); 479 KPTphys = allocpages(firstaddr, nkpt); 480 481 KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 482 KPDPvirt = (pdp_entry_t *)PHYS_TO_DMAP(KPDPphys); 483 KPDvirt = (pd_entry_t *)PHYS_TO_DMAP(KPDphys); 484 KPTvirt = (pt_entry_t *)PHYS_TO_DMAP(KPTphys); 485 486 bzero(KPML4virt, 1 * PAGE_SIZE); 487 bzero(KPDPvirt, NKPML4E * PAGE_SIZE); 488 bzero(KPDvirt, NKPDPE * PAGE_SIZE); 489 bzero(KPTvirt, nkpt * PAGE_SIZE); 490 491 /* Now map the page tables at their location within PTmap */ 492 for (i = 0; i < nkpt; i++) { 493 KPDvirt[i + kpdi] = KPTphys + (i << PAGE_SHIFT); 494 KPDvirt[i + kpdi] |= VPTE_RW | VPTE_V | VPTE_U; 495 } 496 497 /* And connect up the PD to the PDP */ 498 for (i = 0; i < NKPDPE; i++) { 499 KPDPvirt[i + kpdpi] = KPDphys + (i << PAGE_SHIFT); 500 KPDPvirt[i + kpdpi] |= VPTE_RW | VPTE_V | VPTE_U; 501 } 502 503 /* And recursively map PML4 to itself in order to get PTmap */ 504 KPML4virt[PML4PML4I] = KPML4phys; 505 KPML4virt[PML4PML4I] |= VPTE_RW | VPTE_V | VPTE_U; 506 507 /* Connect the KVA slot up to the PML4 */ 508 KPML4virt[kpml4i] = KPDPphys; 509 KPML4virt[kpml4i] |= VPTE_RW | VPTE_V | VPTE_U; 510 } 511 512 /* 513 * Typically used to initialize a fictitious page by vm/device_pager.c 514 */ 515 void 516 pmap_page_init(struct vm_page *m) 517 { 518 vm_page_init(m); 519 TAILQ_INIT(&m->md.pv_list); 520 } 521 522 /* 523 * Bootstrap the system enough to run with virtual memory. 524 * 525 * On the i386 this is called after mapping has already been enabled 526 * and just syncs the pmap module with what has already been done. 527 * [We can't call it easily with mapping off since the kernel is not 528 * mapped with PA == VA, hence we would have to relocate every address 529 * from the linked base (virtual) address "KERNBASE" to the actual 530 * (physical) address starting relative to 0] 531 */ 532 void 533 pmap_bootstrap(vm_paddr_t *firstaddr, int64_t ptov_offset) 534 { 535 vm_offset_t va; 536 pt_entry_t *pte; 537 538 /* 539 * Create an initial set of page tables to run the kernel in. 540 */ 541 create_pagetables(firstaddr, ptov_offset); 542 543 /* Create the DMAP for the VMM */ 544 if(vmm_enabled) { 545 create_dmap_vmm(firstaddr); 546 } 547 548 virtual_start = KvaStart; 549 virtual_end = KvaEnd; 550 551 /* 552 * Initialize protection array. 553 */ 554 i386_protection_init(); 555 556 /* 557 * The kernel's pmap is statically allocated so we don't have to use 558 * pmap_create, which is unlikely to work correctly at this part of 559 * the boot sequence (XXX and which no longer exists). 560 * 561 * The kernel_pmap's pm_pteobj is used only for locking and not 562 * for mmu pages. 563 */ 564 kernel_pmap.pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 565 kernel_pmap.pm_count = 1; 566 /* don't allow deactivation */ 567 CPUMASK_ASSALLONES(kernel_pmap.pm_active); 568 kernel_pmap.pm_pteobj = NULL; /* see pmap_init */ 569 TAILQ_INIT(&kernel_pmap.pm_pvlist); 570 TAILQ_INIT(&kernel_pmap.pm_pvlist_free); 571 lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok"); 572 spin_init(&kernel_pmap.pm_spin, "pmapbootstrap"); 573 574 /* 575 * Reserve some special page table entries/VA space for temporary 576 * mapping of pages. 577 */ 578 #define SYSMAP(c, p, v, n) \ 579 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 580 581 va = virtual_start; 582 pte = pmap_pte(&kernel_pmap, va); 583 /* 584 * CMAP1/CMAP2 are used for zeroing and copying pages. 585 */ 586 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 587 588 #if JGV 589 /* 590 * Crashdump maps. 591 */ 592 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 593 #endif 594 595 /* 596 * ptvmmap is used for reading arbitrary physical pages via 597 * /dev/mem. 598 */ 599 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 600 601 /* 602 * msgbufp is used to map the system message buffer. 603 * XXX msgbufmap is not used. 604 */ 605 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 606 atop(round_page(MSGBUF_SIZE))) 607 608 virtual_start = va; 609 610 *CMAP1 = 0; 611 /* Not ready to do an invltlb yet for VMM*/ 612 if (!vmm_enabled) 613 cpu_invltlb(); 614 615 } 616 617 /* 618 * Initialize the pmap module. 619 * Called by vm_init, to initialize any structures that the pmap 620 * system needs to map virtual memory. 621 * pmap_init has been enhanced to support in a fairly consistant 622 * way, discontiguous physical memory. 623 */ 624 void 625 pmap_init(void) 626 { 627 int i; 628 int initial_pvs; 629 630 /* 631 * object for kernel page table pages 632 */ 633 /* JG I think the number can be arbitrary */ 634 kptobj = vm_object_allocate(OBJT_DEFAULT, 5); 635 kernel_pmap.pm_pteobj = kptobj; 636 637 /* 638 * Allocate memory for random pmap data structures. Includes the 639 * pv_head_table. 640 */ 641 for(i = 0; i < vm_page_array_size; i++) { 642 vm_page_t m; 643 644 m = &vm_page_array[i]; 645 TAILQ_INIT(&m->md.pv_list); 646 m->md.pv_list_count = 0; 647 } 648 649 /* 650 * init the pv free list 651 */ 652 initial_pvs = vm_page_array_size; 653 if (initial_pvs < MINPV) 654 initial_pvs = MINPV; 655 pvzone = &pvzone_store; 656 pvinit = (struct pv_entry *) kmem_alloc(&kernel_map, 657 initial_pvs * sizeof (struct pv_entry)); 658 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 659 initial_pvs); 660 661 /* 662 * Now it is safe to enable pv_table recording. 663 */ 664 pmap_initialized = TRUE; 665 } 666 667 /* 668 * Initialize the address space (zone) for the pv_entries. Set a 669 * high water mark so that the system can recover from excessive 670 * numbers of pv entries. 671 */ 672 void 673 pmap_init2(void) 674 { 675 int shpgperproc = PMAP_SHPGPERPROC; 676 677 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 678 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 679 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 680 pv_entry_high_water = 9 * (pv_entry_max / 10); 681 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); 682 } 683 684 685 /*************************************************** 686 * Low level helper routines..... 687 ***************************************************/ 688 689 /* 690 * The modification bit is not tracked for any pages in this range. XXX 691 * such pages in this maps should always use pmap_k*() functions and not 692 * be managed anyhow. 693 * 694 * XXX User and kernel address spaces are independant for virtual kernels, 695 * this function only applies to the kernel pmap. 696 */ 697 static int 698 pmap_track_modified(pmap_t pmap, vm_offset_t va) 699 { 700 if (pmap != &kernel_pmap) 701 return 1; 702 if ((va < clean_sva) || (va >= clean_eva)) 703 return 1; 704 else 705 return 0; 706 } 707 708 /* 709 * Extract the physical page address associated with the map/VA pair. 710 * 711 * No requirements. 712 */ 713 vm_paddr_t 714 pmap_extract(pmap_t pmap, vm_offset_t va) 715 { 716 vm_paddr_t rtval; 717 pt_entry_t *pte; 718 pd_entry_t pde, *pdep; 719 720 lwkt_gettoken(&vm_token); 721 rtval = 0; 722 pdep = pmap_pde(pmap, va); 723 if (pdep != NULL) { 724 pde = *pdep; 725 if (pde) { 726 if ((pde & VPTE_PS) != 0) { 727 /* JGV */ 728 rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 729 } else { 730 pte = pmap_pde_to_pte(pdep, va); 731 rtval = (*pte & VPTE_FRAME) | (va & PAGE_MASK); 732 } 733 } 734 } 735 lwkt_reltoken(&vm_token); 736 return rtval; 737 } 738 739 /* 740 * Similar to extract but checks protections, SMP-friendly short-cut for 741 * vm_fault_page[_quick](). 742 */ 743 vm_page_t 744 pmap_fault_page_quick(pmap_t pmap __unused, vm_offset_t vaddr __unused, 745 vm_prot_t prot __unused) 746 { 747 return(NULL); 748 } 749 750 /* 751 * Routine: pmap_kextract 752 * Function: 753 * Extract the physical page address associated 754 * kernel virtual address. 755 */ 756 vm_paddr_t 757 pmap_kextract(vm_offset_t va) 758 { 759 pd_entry_t pde; 760 vm_paddr_t pa; 761 762 KKASSERT(va >= KvaStart && va < KvaEnd); 763 764 /* 765 * The DMAP region is not included in [KvaStart, KvaEnd) 766 */ 767 #if 0 768 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 769 pa = DMAP_TO_PHYS(va); 770 } else { 771 #endif 772 pde = *vtopde(va); 773 if (pde & VPTE_PS) { 774 /* JGV */ 775 pa = (pde & PG_PS_FRAME) | (va & PDRMASK); 776 } else { 777 /* 778 * Beware of a concurrent promotion that changes the 779 * PDE at this point! For example, vtopte() must not 780 * be used to access the PTE because it would use the 781 * new PDE. It is, however, safe to use the old PDE 782 * because the page table page is preserved by the 783 * promotion. 784 */ 785 pa = *pmap_pde_to_pte(&pde, va); 786 pa = (pa & VPTE_FRAME) | (va & PAGE_MASK); 787 } 788 #if 0 789 } 790 #endif 791 return pa; 792 } 793 794 /*************************************************** 795 * Low level mapping routines..... 796 ***************************************************/ 797 798 /* 799 * Enter a mapping into kernel_pmap. Mappings created in this fashion 800 * are not managed. Mappings must be immediately accessible on all cpus. 801 * 802 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the 803 * real pmap and handle related races before storing the new vpte. 804 */ 805 void 806 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 807 { 808 pt_entry_t *pte; 809 pt_entry_t npte; 810 811 KKASSERT(va >= KvaStart && va < KvaEnd); 812 npte = pa | VPTE_RW | VPTE_V | VPTE_U; 813 pte = vtopte(va); 814 if (*pte & VPTE_V) 815 pmap_inval_pte(pte, &kernel_pmap, va); 816 *pte = npte; 817 } 818 819 /* 820 * Enter an unmanaged KVA mapping for the private use of the current 821 * cpu only. 822 * 823 * It is illegal for the mapping to be accessed by other cpus without 824 * proper invalidation. 825 */ 826 int 827 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) 828 { 829 pt_entry_t *ptep; 830 pt_entry_t npte; 831 int res; 832 833 KKASSERT(va >= KvaStart && va < KvaEnd); 834 835 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 836 ptep = vtopte(va); 837 #if 1 838 res = 1; 839 #else 840 /* FUTURE */ 841 res = (*ptep != 0); 842 #endif 843 844 if (*ptep & VPTE_V) 845 pmap_inval_pte_quick(ptep, &kernel_pmap, va); 846 *ptep = npte; 847 848 return res; 849 } 850 851 int 852 pmap_kenter_noinval(vm_offset_t va, vm_paddr_t pa) 853 { 854 pt_entry_t *ptep; 855 pt_entry_t npte; 856 int res; 857 858 KKASSERT(va >= KvaStart && va < KvaEnd); 859 860 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 861 ptep = vtopte(va); 862 #if 1 863 res = 1; 864 #else 865 /* FUTURE */ 866 res = (*ptep != 0); 867 #endif 868 869 *ptep = npte; 870 871 return res; 872 } 873 874 /* 875 * Remove an unmanaged mapping created with pmap_kenter*(). 876 */ 877 void 878 pmap_kremove(vm_offset_t va) 879 { 880 pt_entry_t *pte; 881 882 KKASSERT(va >= KvaStart && va < KvaEnd); 883 884 pte = vtopte(va); 885 if (*pte & VPTE_V) 886 pmap_inval_pte(pte, &kernel_pmap, va); 887 *pte = 0; 888 } 889 890 /* 891 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize 892 * only with this cpu. 893 * 894 * Unfortunately because we optimize new entries by testing VPTE_V later 895 * on, we actually still have to synchronize with all the cpus. XXX maybe 896 * store a junk value and test against 0 in the other places instead? 897 */ 898 void 899 pmap_kremove_quick(vm_offset_t va) 900 { 901 pt_entry_t *pte; 902 903 KKASSERT(va >= KvaStart && va < KvaEnd); 904 905 pte = vtopte(va); 906 if (*pte & VPTE_V) 907 pmap_inval_pte(pte, &kernel_pmap, va); /* NOT _quick */ 908 *pte = 0; 909 } 910 911 void 912 pmap_kremove_noinval(vm_offset_t va) 913 { 914 pt_entry_t *pte; 915 916 KKASSERT(va >= KvaStart && va < KvaEnd); 917 918 pte = vtopte(va); 919 *pte = 0; 920 } 921 922 /* 923 * Used to map a range of physical addresses into kernel 924 * virtual address space. 925 * 926 * For now, VM is already on, we only need to map the 927 * specified memory. 928 */ 929 vm_offset_t 930 pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) 931 { 932 return PHYS_TO_DMAP(start); 933 } 934 935 /* 936 * Map a set of unmanaged VM pages into KVM. 937 */ 938 void 939 pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 940 { 941 vm_offset_t end_va; 942 943 end_va = va + count * PAGE_SIZE; 944 KKASSERT(va >= KvaStart && end_va < KvaEnd); 945 946 while (va < end_va) { 947 pt_entry_t *pte; 948 949 pte = vtopte(va); 950 pmap_inval_pte(pte, &kernel_pmap, va); 951 *pte = VM_PAGE_TO_PHYS(*m) | VPTE_RW | VPTE_V | VPTE_U; 952 va += PAGE_SIZE; 953 m++; 954 } 955 } 956 957 /* 958 * Undo the effects of pmap_qenter*(). 959 */ 960 void 961 pmap_qremove(vm_offset_t va, int count) 962 { 963 vm_offset_t end_va; 964 965 end_va = va + count * PAGE_SIZE; 966 KKASSERT(va >= KvaStart && end_va < KvaEnd); 967 968 while (va < end_va) { 969 pt_entry_t *pte; 970 971 pte = vtopte(va); 972 atomic_swap_long(pte, 0); 973 pmap_inval_pte(pte, &kernel_pmap, va); 974 va += PAGE_SIZE; 975 } 976 } 977 978 void 979 pmap_qremove_quick(vm_offset_t va, int count) 980 { 981 vm_offset_t end_va; 982 983 end_va = va + count * PAGE_SIZE; 984 KKASSERT(va >= KvaStart && end_va < KvaEnd); 985 986 while (va < end_va) { 987 pt_entry_t *pte; 988 989 pte = vtopte(va); 990 atomic_swap_long(pte, 0); 991 cpu_invlpg((void *)va); 992 va += PAGE_SIZE; 993 } 994 } 995 996 void 997 pmap_qremove_noinval(vm_offset_t va, int count) 998 { 999 vm_offset_t end_va; 1000 1001 end_va = va + count * PAGE_SIZE; 1002 KKASSERT(va >= KvaStart && end_va < KvaEnd); 1003 1004 while (va < end_va) { 1005 pt_entry_t *pte; 1006 1007 pte = vtopte(va); 1008 atomic_swap_long(pte, 0); 1009 va += PAGE_SIZE; 1010 } 1011 } 1012 1013 /* 1014 * This routine works like vm_page_lookup() but also blocks as long as the 1015 * page is busy. This routine does not busy the page it returns. 1016 * 1017 * Unless the caller is managing objects whos pages are in a known state, 1018 * the call should be made with a critical section held so the page's object 1019 * association remains valid on return. 1020 */ 1021 static vm_page_t 1022 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 1023 { 1024 vm_page_t m; 1025 1026 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 1027 m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp"); 1028 1029 return(m); 1030 } 1031 1032 /* 1033 * Create a new thread and optionally associate it with a (new) process. 1034 * NOTE! the new thread's cpu may not equal the current cpu. 1035 */ 1036 void 1037 pmap_init_thread(thread_t td) 1038 { 1039 /* enforce pcb placement */ 1040 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; 1041 td->td_savefpu = &td->td_pcb->pcb_save; 1042 td->td_sp = (char *)td->td_pcb - 16; /* JG is -16 needed on x86_64? */ 1043 } 1044 1045 /* 1046 * This routine directly affects the fork perf for a process. 1047 */ 1048 void 1049 pmap_init_proc(struct proc *p) 1050 { 1051 } 1052 1053 /*************************************************** 1054 * Page table page management routines..... 1055 ***************************************************/ 1056 1057 static __inline int pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, 1058 vm_page_t m); 1059 1060 /* 1061 * This routine unholds page table pages, and if the hold count 1062 * drops to zero, then it decrements the wire count. 1063 * 1064 * We must recheck that this is the last hold reference after busy-sleeping 1065 * on the page. 1066 */ 1067 static int 1068 _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 1069 { 1070 vm_page_busy_wait(m, FALSE, "pmuwpt"); 1071 KASSERT(m->queue == PQ_NONE, 1072 ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m)); 1073 1074 if (m->hold_count == 1) { 1075 /* 1076 * Unmap the page table page. 1077 */ 1078 //abort(); /* JG */ 1079 /* pmap_inval_add(info, pmap, -1); */ 1080 1081 if (m->pindex >= (NUPDE + NUPDPE)) { 1082 /* PDP page */ 1083 pml4_entry_t *pml4; 1084 pml4 = pmap_pml4e(pmap, va); 1085 *pml4 = 0; 1086 } else if (m->pindex >= NUPDE) { 1087 /* PD page */ 1088 pdp_entry_t *pdp; 1089 pdp = pmap_pdpe(pmap, va); 1090 *pdp = 0; 1091 } else { 1092 /* PT page */ 1093 pd_entry_t *pd; 1094 pd = pmap_pde(pmap, va); 1095 *pd = 0; 1096 } 1097 1098 KKASSERT(pmap->pm_stats.resident_count > 0); 1099 --pmap->pm_stats.resident_count; 1100 1101 if (pmap->pm_ptphint == m) 1102 pmap->pm_ptphint = NULL; 1103 1104 if (m->pindex < NUPDE) { 1105 /* We just released a PT, unhold the matching PD */ 1106 vm_page_t pdpg; 1107 1108 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & VPTE_FRAME); 1109 pmap_unwire_pte_hold(pmap, va, pdpg); 1110 } 1111 if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { 1112 /* We just released a PD, unhold the matching PDP */ 1113 vm_page_t pdppg; 1114 1115 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & VPTE_FRAME); 1116 pmap_unwire_pte_hold(pmap, va, pdppg); 1117 } 1118 1119 /* 1120 * This was our last hold, the page had better be unwired 1121 * after we decrement wire_count. 1122 * 1123 * FUTURE NOTE: shared page directory page could result in 1124 * multiple wire counts. 1125 */ 1126 vm_page_unhold(m); 1127 --m->wire_count; 1128 KKASSERT(m->wire_count == 0); 1129 atomic_add_int(&vmstats.v_wire_count, -1); 1130 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1131 vm_page_flash(m); 1132 vm_page_free_zero(m); 1133 return 1; 1134 } else { 1135 KKASSERT(m->hold_count > 1); 1136 vm_page_unhold(m); 1137 vm_page_wakeup(m); 1138 return 0; 1139 } 1140 } 1141 1142 static __inline int 1143 pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 1144 { 1145 KKASSERT(m->hold_count > 0); 1146 if (m->hold_count > 1) { 1147 vm_page_unhold(m); 1148 return 0; 1149 } else { 1150 return _pmap_unwire_pte_hold(pmap, va, m); 1151 } 1152 } 1153 1154 /* 1155 * After removing a page table entry, this routine is used to 1156 * conditionally free the page, and manage the hold/wire counts. 1157 */ 1158 static int 1159 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1160 { 1161 /* JG Use FreeBSD/amd64 or FreeBSD/i386 ptepde approaches? */ 1162 vm_pindex_t ptepindex; 1163 1164 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1165 1166 if (mpte == NULL) { 1167 /* 1168 * page table pages in the kernel_pmap are not managed. 1169 */ 1170 if (pmap == &kernel_pmap) 1171 return(0); 1172 ptepindex = pmap_pde_pindex(va); 1173 if (pmap->pm_ptphint && 1174 (pmap->pm_ptphint->pindex == ptepindex)) { 1175 mpte = pmap->pm_ptphint; 1176 } else { 1177 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1178 pmap->pm_ptphint = mpte; 1179 vm_page_wakeup(mpte); 1180 } 1181 } 1182 1183 return pmap_unwire_pte_hold(pmap, va, mpte); 1184 } 1185 1186 /* 1187 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we 1188 * just dummy it up so it works well enough for fork(). 1189 * 1190 * In DragonFly, process pmaps may only be used to manipulate user address 1191 * space, never kernel address space. 1192 */ 1193 void 1194 pmap_pinit0(struct pmap *pmap) 1195 { 1196 pmap_pinit(pmap); 1197 } 1198 1199 /* 1200 * Initialize a preallocated and zeroed pmap structure, 1201 * such as one in a vmspace structure. 1202 */ 1203 void 1204 pmap_pinit(struct pmap *pmap) 1205 { 1206 vm_page_t ptdpg; 1207 1208 /* 1209 * No need to allocate page table space yet but we do need a valid 1210 * page directory table. 1211 */ 1212 if (pmap->pm_pml4 == NULL) { 1213 pmap->pm_pml4 = 1214 (pml4_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); 1215 } 1216 1217 /* 1218 * Allocate an object for the ptes 1219 */ 1220 if (pmap->pm_pteobj == NULL) 1221 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + PML4PML4I + 1); 1222 1223 /* 1224 * Allocate the page directory page, unless we already have 1225 * one cached. If we used the cached page the wire_count will 1226 * already be set appropriately. 1227 */ 1228 if ((ptdpg = pmap->pm_pdirm) == NULL) { 1229 ptdpg = vm_page_grab(pmap->pm_pteobj, 1230 NUPDE + NUPDPE + PML4PML4I, 1231 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | 1232 VM_ALLOC_ZERO); 1233 pmap->pm_pdirm = ptdpg; 1234 vm_page_flag_clear(ptdpg, PG_MAPPED); 1235 vm_page_wire(ptdpg); 1236 vm_page_wakeup(ptdpg); 1237 pmap_kenter((vm_offset_t)pmap->pm_pml4, VM_PAGE_TO_PHYS(ptdpg)); 1238 } 1239 pmap->pm_count = 1; 1240 CPUMASK_ASSZERO(pmap->pm_active); 1241 pmap->pm_ptphint = NULL; 1242 TAILQ_INIT(&pmap->pm_pvlist); 1243 TAILQ_INIT(&pmap->pm_pvlist_free); 1244 spin_init(&pmap->pm_spin, "pmapinit"); 1245 lwkt_token_init(&pmap->pm_token, "pmap_tok"); 1246 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1247 pmap->pm_stats.resident_count = 1; 1248 } 1249 1250 /* 1251 * Clean up a pmap structure so it can be physically freed. This routine 1252 * is called by the vmspace dtor function. A great deal of pmap data is 1253 * left passively mapped to improve vmspace management so we have a bit 1254 * of cleanup work to do here. 1255 * 1256 * No requirements. 1257 */ 1258 void 1259 pmap_puninit(pmap_t pmap) 1260 { 1261 vm_page_t p; 1262 1263 KKASSERT(CPUMASK_TESTZERO(pmap->pm_active)); 1264 if ((p = pmap->pm_pdirm) != NULL) { 1265 KKASSERT(pmap->pm_pml4 != NULL); 1266 pmap_kremove((vm_offset_t)pmap->pm_pml4); 1267 vm_page_busy_wait(p, FALSE, "pgpun"); 1268 p->wire_count--; 1269 atomic_add_int(&vmstats.v_wire_count, -1); 1270 vm_page_free_zero(p); 1271 pmap->pm_pdirm = NULL; 1272 } 1273 if (pmap->pm_pml4) { 1274 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE); 1275 pmap->pm_pml4 = NULL; 1276 } 1277 if (pmap->pm_pteobj) { 1278 vm_object_deallocate(pmap->pm_pteobj); 1279 pmap->pm_pteobj = NULL; 1280 } 1281 } 1282 1283 /* 1284 * Wire in kernel global address entries. To avoid a race condition 1285 * between pmap initialization and pmap_growkernel, this procedure 1286 * adds the pmap to the master list (which growkernel scans to update), 1287 * then copies the template. 1288 * 1289 * In a virtual kernel there are no kernel global address entries. 1290 * 1291 * No requirements. 1292 */ 1293 void 1294 pmap_pinit2(struct pmap *pmap) 1295 { 1296 spin_lock(&pmap_spin); 1297 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode); 1298 spin_unlock(&pmap_spin); 1299 } 1300 1301 /* 1302 * Attempt to release and free a vm_page in a pmap. Returns 1 on success, 1303 * 0 on failure (if the procedure had to sleep). 1304 * 1305 * When asked to remove the page directory page itself, we actually just 1306 * leave it cached so we do not have to incur the SMP inval overhead of 1307 * removing the kernel mapping. pmap_puninit() will take care of it. 1308 */ 1309 static int 1310 pmap_release_free_page(struct pmap *pmap, vm_page_t p) 1311 { 1312 /* 1313 * This code optimizes the case of freeing non-busy 1314 * page-table pages. Those pages are zero now, and 1315 * might as well be placed directly into the zero queue. 1316 */ 1317 if (vm_page_busy_try(p, FALSE)) { 1318 vm_page_sleep_busy(p, FALSE, "pmaprl"); 1319 return 0; 1320 } 1321 1322 /* 1323 * Remove the page table page from the processes address space. 1324 */ 1325 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1326 /* 1327 * We are the pml4 table itself. 1328 */ 1329 /* XXX anything to do here? */ 1330 } else if (p->pindex >= (NUPDE + NUPDPE)) { 1331 /* 1332 * We are a PDP page. 1333 * We look for the PML4 entry that points to us. 1334 */ 1335 vm_page_t m4 = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + PML4PML4I); 1336 KKASSERT(m4 != NULL); 1337 pml4_entry_t *pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m4)); 1338 int idx = (p->pindex - (NUPDE + NUPDPE)) % NPML4EPG; 1339 KKASSERT(pml4[idx] != 0); 1340 pml4[idx] = 0; 1341 m4->hold_count--; 1342 /* JG What about wire_count? */ 1343 } else if (p->pindex >= NUPDE) { 1344 /* 1345 * We are a PD page. 1346 * We look for the PDP entry that points to us. 1347 */ 1348 vm_page_t m3 = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + (p->pindex - NUPDE) / NPDPEPG); 1349 KKASSERT(m3 != NULL); 1350 pdp_entry_t *pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m3)); 1351 int idx = (p->pindex - NUPDE) % NPDPEPG; 1352 KKASSERT(pdp[idx] != 0); 1353 pdp[idx] = 0; 1354 m3->hold_count--; 1355 /* JG What about wire_count? */ 1356 } else { 1357 /* We are a PT page. 1358 * We look for the PD entry that points to us. 1359 */ 1360 vm_page_t m2 = vm_page_lookup(pmap->pm_pteobj, NUPDE + p->pindex / NPDEPG); 1361 KKASSERT(m2 != NULL); 1362 pd_entry_t *pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m2)); 1363 int idx = p->pindex % NPDEPG; 1364 pd[idx] = 0; 1365 m2->hold_count--; 1366 /* JG What about wire_count? */ 1367 } 1368 KKASSERT(pmap->pm_stats.resident_count > 0); 1369 --pmap->pm_stats.resident_count; 1370 1371 if (p->hold_count) { 1372 panic("pmap_release: freeing held pt page " 1373 "pmap=%p pg=%p dmap=%p pi=%ld {%ld,%ld,%ld}", 1374 pmap, p, (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(p)), 1375 p->pindex, NUPDE, NUPDPE, PML4PML4I); 1376 } 1377 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) 1378 pmap->pm_ptphint = NULL; 1379 1380 /* 1381 * We leave the top-level page table page cached, wired, and mapped in 1382 * the pmap until the dtor function (pmap_puninit()) gets called. 1383 * However, still clean it up. 1384 */ 1385 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1386 bzero(pmap->pm_pml4, PAGE_SIZE); 1387 vm_page_wakeup(p); 1388 } else { 1389 abort(); 1390 p->wire_count--; 1391 atomic_add_int(&vmstats.v_wire_count, -1); 1392 /* JG eventually revert to using vm_page_free_zero() */ 1393 vm_page_free(p); 1394 } 1395 return 1; 1396 } 1397 1398 /* 1399 * this routine is called if the page table page is not 1400 * mapped correctly. 1401 */ 1402 static vm_page_t 1403 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex) 1404 { 1405 vm_page_t m, pdppg, pdpg; 1406 1407 /* 1408 * Find or fabricate a new pagetable page. Handle allocation 1409 * races by checking m->valid. 1410 */ 1411 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1412 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1413 1414 KASSERT(m->queue == PQ_NONE, 1415 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1416 1417 /* 1418 * Increment the hold count for the page we will be returning to 1419 * the caller. 1420 */ 1421 m->hold_count++; 1422 vm_page_wire(m); 1423 1424 /* 1425 * Map the pagetable page into the process address space, if 1426 * it isn't already there. 1427 */ 1428 ++pmap->pm_stats.resident_count; 1429 1430 if (ptepindex >= (NUPDE + NUPDPE)) { 1431 pml4_entry_t *pml4; 1432 vm_pindex_t pml4index; 1433 1434 /* Wire up a new PDP page */ 1435 pml4index = ptepindex - (NUPDE + NUPDPE); 1436 pml4 = &pmap->pm_pml4[pml4index]; 1437 *pml4 = VM_PAGE_TO_PHYS(m) | 1438 VPTE_RW | VPTE_V | VPTE_U | 1439 VPTE_A | VPTE_M; 1440 } else if (ptepindex >= NUPDE) { 1441 vm_pindex_t pml4index; 1442 vm_pindex_t pdpindex; 1443 pml4_entry_t *pml4; 1444 pdp_entry_t *pdp; 1445 1446 /* Wire up a new PD page */ 1447 pdpindex = ptepindex - NUPDE; 1448 pml4index = pdpindex >> NPML4EPGSHIFT; 1449 1450 pml4 = &pmap->pm_pml4[pml4index]; 1451 if ((*pml4 & VPTE_V) == 0) { 1452 /* Have to allocate a new PDP page, recurse */ 1453 if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index) 1454 == NULL) { 1455 --m->wire_count; 1456 vm_page_free(m); 1457 return (NULL); 1458 } 1459 } else { 1460 /* Add reference to the PDP page */ 1461 pdppg = PHYS_TO_VM_PAGE(*pml4 & VPTE_FRAME); 1462 pdppg->hold_count++; 1463 } 1464 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1465 1466 /* Now find the pdp page */ 1467 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1468 KKASSERT(*pdp == 0); /* JG DEBUG64 */ 1469 *pdp = VM_PAGE_TO_PHYS(m) | VPTE_RW | VPTE_V | VPTE_U | 1470 VPTE_A | VPTE_M; 1471 } else { 1472 vm_pindex_t pml4index; 1473 vm_pindex_t pdpindex; 1474 pml4_entry_t *pml4; 1475 pdp_entry_t *pdp; 1476 pd_entry_t *pd; 1477 1478 /* Wire up a new PT page */ 1479 pdpindex = ptepindex >> NPDPEPGSHIFT; 1480 pml4index = pdpindex >> NPML4EPGSHIFT; 1481 1482 /* First, find the pdp and check that its valid. */ 1483 pml4 = &pmap->pm_pml4[pml4index]; 1484 if ((*pml4 & VPTE_V) == 0) { 1485 /* We miss a PDP page. We ultimately need a PD page. 1486 * Recursively allocating a PD page will allocate 1487 * the missing PDP page and will also allocate 1488 * the PD page we need. 1489 */ 1490 /* Have to allocate a new PD page, recurse */ 1491 if (_pmap_allocpte(pmap, NUPDE + pdpindex) 1492 == NULL) { 1493 --m->wire_count; 1494 vm_page_free(m); 1495 return (NULL); 1496 } 1497 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1498 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1499 } else { 1500 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1501 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1502 if ((*pdp & VPTE_V) == 0) { 1503 /* Have to allocate a new PD page, recurse */ 1504 if (_pmap_allocpte(pmap, NUPDE + pdpindex) 1505 == NULL) { 1506 --m->wire_count; 1507 vm_page_free(m); 1508 return (NULL); 1509 } 1510 } else { 1511 /* Add reference to the PD page */ 1512 pdpg = PHYS_TO_VM_PAGE(*pdp & VPTE_FRAME); 1513 pdpg->hold_count++; 1514 } 1515 } 1516 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & VPTE_FRAME); 1517 1518 /* Now we know where the page directory page is */ 1519 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; 1520 KKASSERT(*pd == 0); /* JG DEBUG64 */ 1521 *pd = VM_PAGE_TO_PHYS(m) | VPTE_RW | VPTE_V | VPTE_U | 1522 VPTE_A | VPTE_M; 1523 } 1524 1525 /* 1526 * Set the page table hint 1527 */ 1528 pmap->pm_ptphint = m; 1529 vm_page_flag_set(m, PG_MAPPED); 1530 vm_page_wakeup(m); 1531 1532 return m; 1533 } 1534 1535 /* 1536 * Determine the page table page required to access the VA in the pmap 1537 * and allocate it if necessary. Return a held vm_page_t for the page. 1538 * 1539 * Only used with user pmaps. 1540 */ 1541 static vm_page_t 1542 pmap_allocpte(pmap_t pmap, vm_offset_t va) 1543 { 1544 vm_pindex_t ptepindex; 1545 pd_entry_t *pd; 1546 vm_page_t m; 1547 1548 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1549 1550 /* 1551 * Calculate pagetable page index 1552 */ 1553 ptepindex = pmap_pde_pindex(va); 1554 1555 /* 1556 * Get the page directory entry 1557 */ 1558 pd = pmap_pde(pmap, va); 1559 1560 /* 1561 * This supports switching from a 2MB page to a 1562 * normal 4K page. 1563 */ 1564 if (pd != NULL && (*pd & (VPTE_PS | VPTE_V)) == (VPTE_PS | VPTE_V)) { 1565 panic("no promotion/demotion yet"); 1566 *pd = 0; 1567 pd = NULL; 1568 /*cpu_invltlb();*/ 1569 /*smp_invltlb();*/ 1570 } 1571 1572 /* 1573 * If the page table page is mapped, we just increment the 1574 * hold count, and activate it. 1575 */ 1576 if (pd != NULL && (*pd & VPTE_V) != 0) { 1577 /* YYY hint is used here on i386 */ 1578 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1579 pmap->pm_ptphint = m; 1580 vm_page_hold(m); 1581 vm_page_wakeup(m); 1582 return m; 1583 } 1584 /* 1585 * Here if the pte page isn't mapped, or if it has been deallocated. 1586 */ 1587 return _pmap_allocpte(pmap, ptepindex); 1588 } 1589 1590 1591 /*************************************************** 1592 * Pmap allocation/deallocation routines. 1593 ***************************************************/ 1594 1595 /* 1596 * Release any resources held by the given physical map. 1597 * Called when a pmap initialized by pmap_pinit is being released. 1598 * Should only be called if the map contains no valid mappings. 1599 * 1600 * Caller must hold pmap->pm_token 1601 */ 1602 static int pmap_release_callback(struct vm_page *p, void *data); 1603 1604 void 1605 pmap_release(struct pmap *pmap) 1606 { 1607 vm_object_t object = pmap->pm_pteobj; 1608 struct rb_vm_page_scan_info info; 1609 1610 KKASSERT(pmap != &kernel_pmap); 1611 1612 lwkt_gettoken(&vm_token); 1613 #if defined(DIAGNOSTIC) 1614 if (object->ref_count != 1) 1615 panic("pmap_release: pteobj reference count != 1"); 1616 #endif 1617 1618 info.pmap = pmap; 1619 info.object = object; 1620 1621 KASSERT(CPUMASK_TESTZERO(pmap->pm_active), 1622 ("pmap %p still active! %016jx", 1623 pmap, 1624 (uintmax_t)CPUMASK_LOWMASK(pmap->pm_active))); 1625 1626 spin_lock(&pmap_spin); 1627 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode); 1628 spin_unlock(&pmap_spin); 1629 1630 vm_object_hold(object); 1631 do { 1632 info.error = 0; 1633 info.mpte = NULL; 1634 info.limit = object->generation; 1635 1636 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, 1637 pmap_release_callback, &info); 1638 if (info.error == 0 && info.mpte) { 1639 if (!pmap_release_free_page(pmap, info.mpte)) 1640 info.error = 1; 1641 } 1642 } while (info.error); 1643 vm_object_drop(object); 1644 lwkt_reltoken(&vm_token); 1645 } 1646 1647 static int 1648 pmap_release_callback(struct vm_page *p, void *data) 1649 { 1650 struct rb_vm_page_scan_info *info = data; 1651 1652 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1653 info->mpte = p; 1654 return(0); 1655 } 1656 if (!pmap_release_free_page(info->pmap, p)) { 1657 info->error = 1; 1658 return(-1); 1659 } 1660 if (info->object->generation != info->limit) { 1661 info->error = 1; 1662 return(-1); 1663 } 1664 return(0); 1665 } 1666 1667 /* 1668 * Grow the number of kernel page table entries, if needed. 1669 * 1670 * No requirements. 1671 */ 1672 void 1673 pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) 1674 { 1675 vm_offset_t addr; 1676 vm_paddr_t paddr; 1677 vm_offset_t ptppaddr; 1678 vm_page_t nkpg; 1679 pd_entry_t *pde, newpdir; 1680 pdp_entry_t newpdp; 1681 1682 addr = kend; 1683 1684 vm_object_hold(kptobj); 1685 if (kernel_vm_end == 0) { 1686 kernel_vm_end = KvaStart; 1687 nkpt = 0; 1688 while ((*pmap_pde(&kernel_pmap, kernel_vm_end) & VPTE_V) != 0) { 1689 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1690 nkpt++; 1691 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1692 kernel_vm_end = kernel_map.max_offset; 1693 break; 1694 } 1695 } 1696 } 1697 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1698 if (addr - 1 >= kernel_map.max_offset) 1699 addr = kernel_map.max_offset; 1700 while (kernel_vm_end < addr) { 1701 pde = pmap_pde(&kernel_pmap, kernel_vm_end); 1702 if (pde == NULL) { 1703 /* We need a new PDP entry */ 1704 nkpg = vm_page_alloc(kptobj, nkpt, 1705 VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM 1706 | VM_ALLOC_INTERRUPT); 1707 if (nkpg == NULL) { 1708 panic("pmap_growkernel: no memory to " 1709 "grow kernel"); 1710 } 1711 paddr = VM_PAGE_TO_PHYS(nkpg); 1712 pmap_zero_page(paddr); 1713 newpdp = (pdp_entry_t)(paddr | 1714 VPTE_V | VPTE_RW | VPTE_U | 1715 VPTE_A | VPTE_M); 1716 *pmap_pdpe(&kernel_pmap, kernel_vm_end) = newpdp; 1717 nkpt++; 1718 continue; /* try again */ 1719 } 1720 if ((*pde & VPTE_V) != 0) { 1721 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1722 ~(PAGE_SIZE * NPTEPG - 1); 1723 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1724 kernel_vm_end = kernel_map.max_offset; 1725 break; 1726 } 1727 continue; 1728 } 1729 1730 /* 1731 * This index is bogus, but out of the way 1732 */ 1733 nkpg = vm_page_alloc(kptobj, nkpt, 1734 VM_ALLOC_NORMAL | 1735 VM_ALLOC_SYSTEM | 1736 VM_ALLOC_INTERRUPT); 1737 if (nkpg == NULL) 1738 panic("pmap_growkernel: no memory to grow kernel"); 1739 1740 vm_page_wire(nkpg); 1741 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1742 pmap_zero_page(ptppaddr); 1743 newpdir = (pd_entry_t)(ptppaddr | 1744 VPTE_V | VPTE_RW | VPTE_U | 1745 VPTE_A | VPTE_M); 1746 *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir; 1747 nkpt++; 1748 1749 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1750 ~(PAGE_SIZE * NPTEPG - 1); 1751 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1752 kernel_vm_end = kernel_map.max_offset; 1753 break; 1754 } 1755 } 1756 vm_object_drop(kptobj); 1757 } 1758 1759 /* 1760 * Add a reference to the specified pmap. 1761 * 1762 * No requirements. 1763 */ 1764 void 1765 pmap_reference(pmap_t pmap) 1766 { 1767 if (pmap) { 1768 lwkt_gettoken(&vm_token); 1769 ++pmap->pm_count; 1770 lwkt_reltoken(&vm_token); 1771 } 1772 } 1773 1774 /************************************************************************ 1775 * VMSPACE MANAGEMENT * 1776 ************************************************************************ 1777 * 1778 * The VMSPACE management we do in our virtual kernel must be reflected 1779 * in the real kernel. This is accomplished by making vmspace system 1780 * calls to the real kernel. 1781 */ 1782 void 1783 cpu_vmspace_alloc(struct vmspace *vm) 1784 { 1785 int r; 1786 void *rp; 1787 vpte_t vpte; 1788 1789 /* 1790 * If VMM enable, don't do nothing, we 1791 * are able to use real page tables 1792 */ 1793 if (vmm_enabled) 1794 return; 1795 1796 #define USER_SIZE (VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) 1797 1798 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) 1799 panic("vmspace_create() failed"); 1800 1801 rp = vmspace_mmap(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1802 PROT_READ|PROT_WRITE, 1803 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, 1804 MemImageFd, 0); 1805 if (rp == MAP_FAILED) 1806 panic("vmspace_mmap: failed"); 1807 vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1808 MADV_NOSYNC, 0); 1809 vpte = VM_PAGE_TO_PHYS(vmspace_pmap(vm)->pm_pdirm) | VPTE_RW | VPTE_V | VPTE_U; 1810 r = vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1811 MADV_SETMAP, vpte); 1812 if (r < 0) 1813 panic("vmspace_mcontrol: failed"); 1814 } 1815 1816 void 1817 cpu_vmspace_free(struct vmspace *vm) 1818 { 1819 /* 1820 * If VMM enable, don't do nothing, we 1821 * are able to use real page tables 1822 */ 1823 if (vmm_enabled) 1824 return; 1825 1826 if (vmspace_destroy(&vm->vm_pmap) < 0) 1827 panic("vmspace_destroy() failed"); 1828 } 1829 1830 /*************************************************** 1831 * page management routines. 1832 ***************************************************/ 1833 1834 /* 1835 * free the pv_entry back to the free list. This function may be 1836 * called from an interrupt. 1837 */ 1838 static __inline void 1839 free_pv_entry(pv_entry_t pv) 1840 { 1841 pv_entry_count--; 1842 KKASSERT(pv_entry_count >= 0); 1843 zfree(pvzone, pv); 1844 } 1845 1846 /* 1847 * get a new pv_entry, allocating a block from the system 1848 * when needed. This function may be called from an interrupt. 1849 */ 1850 static pv_entry_t 1851 get_pv_entry(void) 1852 { 1853 pv_entry_count++; 1854 if (pv_entry_high_water && 1855 (pv_entry_count > pv_entry_high_water) && 1856 (pmap_pagedaemon_waken == 0)) { 1857 pmap_pagedaemon_waken = 1; 1858 wakeup(&vm_pages_needed); 1859 } 1860 return zalloc(pvzone); 1861 } 1862 1863 /* 1864 * This routine is very drastic, but can save the system 1865 * in a pinch. 1866 * 1867 * No requirements. 1868 */ 1869 void 1870 pmap_collect(void) 1871 { 1872 int i; 1873 vm_page_t m; 1874 static int warningdone=0; 1875 1876 if (pmap_pagedaemon_waken == 0) 1877 return; 1878 lwkt_gettoken(&vm_token); 1879 pmap_pagedaemon_waken = 0; 1880 1881 if (warningdone < 5) { 1882 kprintf("pmap_collect: collecting pv entries -- " 1883 "suggest increasing PMAP_SHPGPERPROC\n"); 1884 warningdone++; 1885 } 1886 1887 for (i = 0; i < vm_page_array_size; i++) { 1888 m = &vm_page_array[i]; 1889 if (m->wire_count || m->hold_count) 1890 continue; 1891 if (vm_page_busy_try(m, TRUE) == 0) { 1892 if (m->wire_count == 0 && m->hold_count == 0) { 1893 pmap_remove_all(m); 1894 } 1895 vm_page_wakeup(m); 1896 } 1897 } 1898 lwkt_reltoken(&vm_token); 1899 } 1900 1901 1902 /* 1903 * If it is the first entry on the list, it is actually 1904 * in the header and we must copy the following entry up 1905 * to the header. Otherwise we must search the list for 1906 * the entry. In either case we free the now unused entry. 1907 * 1908 * caller must hold vm_token. 1909 */ 1910 static int 1911 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1912 { 1913 pv_entry_t pv; 1914 int rtval; 1915 1916 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1917 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1918 if (pmap == pv->pv_pmap && va == pv->pv_va) 1919 break; 1920 } 1921 } else { 1922 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1923 if (va == pv->pv_va) 1924 break; 1925 } 1926 } 1927 1928 /* 1929 * Note that pv_ptem is NULL if the page table page itself is not 1930 * managed, even if the page being removed IS managed. 1931 */ 1932 rtval = 0; 1933 /* JGXXX When can 'pv' be NULL? */ 1934 if (pv) { 1935 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1936 m->md.pv_list_count--; 1937 atomic_add_int(&m->object->agg_pv_list_count, -1); 1938 KKASSERT(m->md.pv_list_count >= 0); 1939 if (TAILQ_EMPTY(&m->md.pv_list)) 1940 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1941 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1942 ++pmap->pm_generation; 1943 KKASSERT(pmap->pm_pteobj != NULL); 1944 vm_object_hold(pmap->pm_pteobj); 1945 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1946 vm_object_drop(pmap->pm_pteobj); 1947 free_pv_entry(pv); 1948 } 1949 return rtval; 1950 } 1951 1952 /* 1953 * Create a pv entry for page at pa for (pmap, va). If the page table page 1954 * holding the VA is managed, mpte will be non-NULL. 1955 */ 1956 static void 1957 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1958 { 1959 pv_entry_t pv; 1960 1961 crit_enter(); 1962 pv = get_pv_entry(); 1963 pv->pv_va = va; 1964 pv->pv_pmap = pmap; 1965 pv->pv_ptem = mpte; 1966 1967 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1968 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1969 m->md.pv_list_count++; 1970 atomic_add_int(&m->object->agg_pv_list_count, 1); 1971 1972 crit_exit(); 1973 } 1974 1975 /* 1976 * pmap_remove_pte: do the things to unmap a page in a process 1977 */ 1978 static int 1979 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1980 { 1981 pt_entry_t oldpte; 1982 vm_page_t m; 1983 1984 oldpte = pmap_inval_loadandclear(ptq, pmap, va); 1985 if (oldpte & VPTE_WIRED) 1986 --pmap->pm_stats.wired_count; 1987 KKASSERT(pmap->pm_stats.wired_count >= 0); 1988 1989 #if 0 1990 /* 1991 * Machines that don't support invlpg, also don't support 1992 * PG_G. XXX PG_G is disabled for SMP so don't worry about 1993 * the SMP case. 1994 */ 1995 if (oldpte & PG_G) 1996 cpu_invlpg((void *)va); 1997 #endif 1998 KKASSERT(pmap->pm_stats.resident_count > 0); 1999 --pmap->pm_stats.resident_count; 2000 if (oldpte & VPTE_MANAGED) { 2001 m = PHYS_TO_VM_PAGE(oldpte); 2002 if (oldpte & VPTE_M) { 2003 #if defined(PMAP_DIAGNOSTIC) 2004 if (pmap_nw_modified(oldpte)) { 2005 kprintf("pmap_remove: modified page not " 2006 "writable: va: 0x%lx, pte: 0x%lx\n", 2007 va, oldpte); 2008 } 2009 #endif 2010 if (pmap_track_modified(pmap, va)) 2011 vm_page_dirty(m); 2012 } 2013 if (oldpte & VPTE_A) 2014 vm_page_flag_set(m, PG_REFERENCED); 2015 return pmap_remove_entry(pmap, m, va); 2016 } else { 2017 return pmap_unuse_pt(pmap, va, NULL); 2018 } 2019 2020 return 0; 2021 } 2022 2023 /* 2024 * pmap_remove_page: 2025 * 2026 * Remove a single page from a process address space. 2027 * 2028 * This function may not be called from an interrupt if the pmap is 2029 * not kernel_pmap. 2030 */ 2031 static void 2032 pmap_remove_page(struct pmap *pmap, vm_offset_t va) 2033 { 2034 pt_entry_t *pte; 2035 2036 pte = pmap_pte(pmap, va); 2037 if (pte == NULL) 2038 return; 2039 if ((*pte & VPTE_V) == 0) 2040 return; 2041 pmap_remove_pte(pmap, pte, va); 2042 } 2043 2044 /* 2045 * Remove the given range of addresses from the specified map. 2046 * 2047 * It is assumed that the start and end are properly rounded to 2048 * the page size. 2049 * 2050 * This function may not be called from an interrupt if the pmap is 2051 * not kernel_pmap. 2052 * 2053 * No requirements. 2054 */ 2055 void 2056 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 2057 { 2058 vm_offset_t va_next; 2059 pml4_entry_t *pml4e; 2060 pdp_entry_t *pdpe; 2061 pd_entry_t ptpaddr, *pde; 2062 pt_entry_t *pte; 2063 2064 if (pmap == NULL) 2065 return; 2066 2067 vm_object_hold(pmap->pm_pteobj); 2068 lwkt_gettoken(&vm_token); 2069 KKASSERT(pmap->pm_stats.resident_count >= 0); 2070 if (pmap->pm_stats.resident_count == 0) { 2071 lwkt_reltoken(&vm_token); 2072 vm_object_drop(pmap->pm_pteobj); 2073 return; 2074 } 2075 2076 /* 2077 * special handling of removing one page. a very 2078 * common operation and easy to short circuit some 2079 * code. 2080 */ 2081 if (sva + PAGE_SIZE == eva) { 2082 pde = pmap_pde(pmap, sva); 2083 if (pde && (*pde & VPTE_PS) == 0) { 2084 pmap_remove_page(pmap, sva); 2085 lwkt_reltoken(&vm_token); 2086 vm_object_drop(pmap->pm_pteobj); 2087 return; 2088 } 2089 } 2090 2091 for (; sva < eva; sva = va_next) { 2092 pml4e = pmap_pml4e(pmap, sva); 2093 if ((*pml4e & VPTE_V) == 0) { 2094 va_next = (sva + NBPML4) & ~PML4MASK; 2095 if (va_next < sva) 2096 va_next = eva; 2097 continue; 2098 } 2099 2100 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2101 if ((*pdpe & VPTE_V) == 0) { 2102 va_next = (sva + NBPDP) & ~PDPMASK; 2103 if (va_next < sva) 2104 va_next = eva; 2105 continue; 2106 } 2107 2108 /* 2109 * Calculate index for next page table. 2110 */ 2111 va_next = (sva + NBPDR) & ~PDRMASK; 2112 if (va_next < sva) 2113 va_next = eva; 2114 2115 pde = pmap_pdpe_to_pde(pdpe, sva); 2116 ptpaddr = *pde; 2117 2118 /* 2119 * Weed out invalid mappings. 2120 */ 2121 if (ptpaddr == 0) 2122 continue; 2123 2124 /* 2125 * Check for large page. 2126 */ 2127 if ((ptpaddr & VPTE_PS) != 0) { 2128 /* JG FreeBSD has more complex treatment here */ 2129 KKASSERT(*pde != 0); 2130 pmap_inval_pde(pde, pmap, sva); 2131 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2132 continue; 2133 } 2134 2135 /* 2136 * Limit our scan to either the end of the va represented 2137 * by the current page table page, or to the end of the 2138 * range being removed. 2139 */ 2140 if (va_next > eva) 2141 va_next = eva; 2142 2143 /* 2144 * NOTE: pmap_remove_pte() can block. 2145 */ 2146 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2147 sva += PAGE_SIZE) { 2148 if (*pte == 0) 2149 continue; 2150 if (pmap_remove_pte(pmap, pte, sva)) 2151 break; 2152 } 2153 } 2154 lwkt_reltoken(&vm_token); 2155 vm_object_drop(pmap->pm_pteobj); 2156 } 2157 2158 /* 2159 * Removes this physical page from all physical maps in which it resides. 2160 * Reflects back modify bits to the pager. 2161 * 2162 * This routine may not be called from an interrupt. 2163 * 2164 * No requirements. 2165 */ 2166 static void 2167 pmap_remove_all(vm_page_t m) 2168 { 2169 pt_entry_t *pte, tpte; 2170 pv_entry_t pv; 2171 2172 #if defined(PMAP_DIAGNOSTIC) 2173 /* 2174 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 2175 * pages! 2176 */ 2177 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2178 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); 2179 } 2180 #endif 2181 2182 lwkt_gettoken(&vm_token); 2183 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2184 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0); 2185 --pv->pv_pmap->pm_stats.resident_count; 2186 2187 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2188 KKASSERT(pte != NULL); 2189 2190 tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va); 2191 if (tpte & VPTE_WIRED) 2192 pv->pv_pmap->pm_stats.wired_count--; 2193 KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0); 2194 2195 if (tpte & VPTE_A) 2196 vm_page_flag_set(m, PG_REFERENCED); 2197 2198 /* 2199 * Update the vm_page_t clean and reference bits. 2200 */ 2201 if (tpte & VPTE_M) { 2202 #if defined(PMAP_DIAGNOSTIC) 2203 if (pmap_nw_modified(tpte)) { 2204 kprintf( 2205 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 2206 pv->pv_va, tpte); 2207 } 2208 #endif 2209 if (pmap_track_modified(pv->pv_pmap, pv->pv_va)) 2210 vm_page_dirty(m); 2211 } 2212 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2213 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2214 ++pv->pv_pmap->pm_generation; 2215 m->md.pv_list_count--; 2216 atomic_add_int(&m->object->agg_pv_list_count, -1); 2217 KKASSERT(m->md.pv_list_count >= 0); 2218 if (TAILQ_EMPTY(&m->md.pv_list)) 2219 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2220 vm_object_hold(pv->pv_pmap->pm_pteobj); 2221 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2222 vm_object_drop(pv->pv_pmap->pm_pteobj); 2223 free_pv_entry(pv); 2224 } 2225 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); 2226 lwkt_reltoken(&vm_token); 2227 } 2228 2229 /* 2230 * Set the physical protection on the specified range of this map 2231 * as requested. 2232 * 2233 * This function may not be called from an interrupt if the map is 2234 * not the kernel_pmap. 2235 * 2236 * No requirements. 2237 */ 2238 void 2239 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2240 { 2241 vm_offset_t va_next; 2242 pml4_entry_t *pml4e; 2243 pdp_entry_t *pdpe; 2244 pd_entry_t ptpaddr, *pde; 2245 pt_entry_t *pte; 2246 2247 /* JG review for NX */ 2248 2249 if (pmap == NULL) 2250 return; 2251 2252 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2253 pmap_remove(pmap, sva, eva); 2254 return; 2255 } 2256 2257 if (prot & VM_PROT_WRITE) 2258 return; 2259 2260 lwkt_gettoken(&vm_token); 2261 2262 for (; sva < eva; sva = va_next) { 2263 2264 pml4e = pmap_pml4e(pmap, sva); 2265 if ((*pml4e & VPTE_V) == 0) { 2266 va_next = (sva + NBPML4) & ~PML4MASK; 2267 if (va_next < sva) 2268 va_next = eva; 2269 continue; 2270 } 2271 2272 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2273 if ((*pdpe & VPTE_V) == 0) { 2274 va_next = (sva + NBPDP) & ~PDPMASK; 2275 if (va_next < sva) 2276 va_next = eva; 2277 continue; 2278 } 2279 2280 va_next = (sva + NBPDR) & ~PDRMASK; 2281 if (va_next < sva) 2282 va_next = eva; 2283 2284 pde = pmap_pdpe_to_pde(pdpe, sva); 2285 ptpaddr = *pde; 2286 2287 /* 2288 * Check for large page. 2289 */ 2290 if ((ptpaddr & VPTE_PS) != 0) { 2291 /* JG correct? */ 2292 pmap_clean_pde(pde, pmap, sva); 2293 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2294 continue; 2295 } 2296 2297 /* 2298 * Weed out invalid mappings. Note: we assume that the page 2299 * directory table is always allocated, and in kernel virtual. 2300 */ 2301 if (ptpaddr == 0) 2302 continue; 2303 2304 if (va_next > eva) 2305 va_next = eva; 2306 2307 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2308 sva += PAGE_SIZE) { 2309 pt_entry_t pbits; 2310 vm_page_t m; 2311 2312 /* 2313 * Clean managed pages and also check the accessed 2314 * bit. Just remove write perms for unmanaged 2315 * pages. Be careful of races, turning off write 2316 * access will force a fault rather then setting 2317 * the modified bit at an unexpected time. 2318 */ 2319 if (*pte & VPTE_MANAGED) { 2320 pbits = pmap_clean_pte(pte, pmap, sva); 2321 m = NULL; 2322 if (pbits & VPTE_A) { 2323 m = PHYS_TO_VM_PAGE(pbits & VPTE_FRAME); 2324 vm_page_flag_set(m, PG_REFERENCED); 2325 atomic_clear_long(pte, VPTE_A); 2326 } 2327 if (pbits & VPTE_M) { 2328 if (pmap_track_modified(pmap, sva)) { 2329 if (m == NULL) 2330 m = PHYS_TO_VM_PAGE(pbits & VPTE_FRAME); 2331 vm_page_dirty(m); 2332 } 2333 } 2334 } else { 2335 pbits = pmap_setro_pte(pte, pmap, sva); 2336 } 2337 } 2338 } 2339 lwkt_reltoken(&vm_token); 2340 } 2341 2342 /* 2343 * Enter a managed page into a pmap. If the page is not wired related pmap 2344 * data can be destroyed at any time for later demand-operation. 2345 * 2346 * Insert the vm_page (m) at virtual address (v) in (pmap), with the 2347 * specified protection, and wire the mapping if requested. 2348 * 2349 * NOTE: This routine may not lazy-evaluate or lose information. The 2350 * page must actually be inserted into the given map NOW. 2351 * 2352 * NOTE: When entering a page at a KVA address, the pmap must be the 2353 * kernel_pmap. 2354 * 2355 * No requirements. 2356 */ 2357 void 2358 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2359 boolean_t wired, vm_map_entry_t entry __unused) 2360 { 2361 vm_paddr_t pa; 2362 pd_entry_t *pde; 2363 pt_entry_t *pte; 2364 vm_paddr_t opa; 2365 pt_entry_t origpte, newpte; 2366 vm_page_t mpte; 2367 2368 if (pmap == NULL) 2369 return; 2370 2371 va = trunc_page(va); 2372 2373 vm_object_hold(pmap->pm_pteobj); 2374 lwkt_gettoken(&vm_token); 2375 2376 /* 2377 * Get the page table page. The kernel_pmap's page table pages 2378 * are preallocated and have no associated vm_page_t. 2379 */ 2380 if (pmap == &kernel_pmap) 2381 mpte = NULL; 2382 else 2383 mpte = pmap_allocpte(pmap, va); 2384 2385 pde = pmap_pde(pmap, va); 2386 if (pde != NULL && (*pde & VPTE_V) != 0) { 2387 if ((*pde & VPTE_PS) != 0) 2388 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2389 pte = pmap_pde_to_pte(pde, va); 2390 } else { 2391 panic("pmap_enter: invalid page directory va=%#lx", va); 2392 } 2393 2394 KKASSERT(pte != NULL); 2395 /* 2396 * Deal with races on the original mapping (though don't worry 2397 * about VPTE_A races) by cleaning it. This will force a fault 2398 * if an attempt is made to write to the page. 2399 */ 2400 pa = VM_PAGE_TO_PHYS(m); 2401 origpte = pmap_clean_pte(pte, pmap, va); 2402 opa = origpte & VPTE_FRAME; 2403 2404 if (origpte & VPTE_PS) 2405 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2406 2407 /* 2408 * Mapping has not changed, must be protection or wiring change. 2409 */ 2410 if (origpte && (opa == pa)) { 2411 /* 2412 * Wiring change, just update stats. We don't worry about 2413 * wiring PT pages as they remain resident as long as there 2414 * are valid mappings in them. Hence, if a user page is wired, 2415 * the PT page will be also. 2416 */ 2417 if (wired && ((origpte & VPTE_WIRED) == 0)) 2418 ++pmap->pm_stats.wired_count; 2419 else if (!wired && (origpte & VPTE_WIRED)) 2420 --pmap->pm_stats.wired_count; 2421 2422 /* 2423 * Remove the extra pte reference. Note that we cannot 2424 * optimize the RO->RW case because we have adjusted the 2425 * wiring count above and may need to adjust the wiring 2426 * bits below. 2427 */ 2428 if (mpte) 2429 mpte->hold_count--; 2430 2431 /* 2432 * We might be turning off write access to the page, 2433 * so we go ahead and sense modify status. 2434 */ 2435 if (origpte & VPTE_MANAGED) { 2436 if ((origpte & VPTE_M) && 2437 pmap_track_modified(pmap, va)) { 2438 vm_page_t om; 2439 om = PHYS_TO_VM_PAGE(opa); 2440 vm_page_dirty(om); 2441 } 2442 pa |= VPTE_MANAGED; 2443 KKASSERT(m->flags & PG_MAPPED); 2444 } 2445 goto validate; 2446 } 2447 /* 2448 * Mapping has changed, invalidate old range and fall through to 2449 * handle validating new mapping. 2450 */ 2451 if (opa) { 2452 int err; 2453 err = pmap_remove_pte(pmap, pte, va); 2454 if (err) 2455 panic("pmap_enter: pte vanished, va: 0x%lx", va); 2456 } 2457 2458 /* 2459 * Enter on the PV list if part of our managed memory. Note that we 2460 * raise IPL while manipulating pv_table since pmap_enter can be 2461 * called at interrupt time. 2462 */ 2463 if (pmap_initialized && 2464 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2465 pmap_insert_entry(pmap, va, mpte, m); 2466 pa |= VPTE_MANAGED; 2467 vm_page_flag_set(m, PG_MAPPED); 2468 } 2469 2470 /* 2471 * Increment counters 2472 */ 2473 ++pmap->pm_stats.resident_count; 2474 if (wired) 2475 pmap->pm_stats.wired_count++; 2476 2477 validate: 2478 /* 2479 * Now validate mapping with desired protection/wiring. 2480 */ 2481 newpte = (pt_entry_t) (pa | pte_prot(pmap, prot) | VPTE_V | VPTE_U); 2482 2483 if (wired) 2484 newpte |= VPTE_WIRED; 2485 // if (pmap != &kernel_pmap) 2486 newpte |= VPTE_U; 2487 2488 /* 2489 * If the mapping or permission bits are different from the 2490 * (now cleaned) original pte, an update is needed. We've 2491 * already downgraded or invalidated the page so all we have 2492 * to do now is update the bits. 2493 * 2494 * XXX should we synchronize RO->RW changes to avoid another 2495 * fault? 2496 */ 2497 if ((origpte & ~(VPTE_RW|VPTE_M|VPTE_A)) != newpte) { 2498 *pte = newpte | VPTE_A; 2499 if (newpte & VPTE_RW) 2500 vm_page_flag_set(m, PG_WRITEABLE); 2501 } 2502 KKASSERT((newpte & VPTE_MANAGED) == 0 || (m->flags & PG_MAPPED)); 2503 lwkt_reltoken(&vm_token); 2504 vm_object_drop(pmap->pm_pteobj); 2505 } 2506 2507 /* 2508 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. 2509 * 2510 * Currently this routine may only be used on user pmaps, not kernel_pmap. 2511 * 2512 * No requirements. 2513 */ 2514 void 2515 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) 2516 { 2517 pt_entry_t *pte; 2518 vm_paddr_t pa; 2519 vm_page_t mpte; 2520 vm_pindex_t ptepindex; 2521 pd_entry_t *ptepa; 2522 2523 KKASSERT(pmap != &kernel_pmap); 2524 2525 KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS); 2526 2527 /* 2528 * Calculate pagetable page index 2529 */ 2530 ptepindex = pmap_pde_pindex(va); 2531 2532 vm_object_hold(pmap->pm_pteobj); 2533 lwkt_gettoken(&vm_token); 2534 2535 do { 2536 /* 2537 * Get the page directory entry 2538 */ 2539 ptepa = pmap_pde(pmap, va); 2540 2541 /* 2542 * If the page table page is mapped, we just increment 2543 * the hold count, and activate it. 2544 */ 2545 if (ptepa && (*ptepa & VPTE_V) != 0) { 2546 if (*ptepa & VPTE_PS) 2547 panic("pmap_enter_quick: unexpected mapping into 2MB page"); 2548 if (pmap->pm_ptphint && 2549 (pmap->pm_ptphint->pindex == ptepindex)) { 2550 mpte = pmap->pm_ptphint; 2551 } else { 2552 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 2553 pmap->pm_ptphint = mpte; 2554 vm_page_wakeup(mpte); 2555 } 2556 if (mpte) 2557 mpte->hold_count++; 2558 } else { 2559 mpte = _pmap_allocpte(pmap, ptepindex); 2560 } 2561 } while (mpte == NULL); 2562 2563 /* 2564 * Ok, now that the page table page has been validated, get the pte. 2565 * If the pte is already mapped undo mpte's hold_count and 2566 * just return. 2567 */ 2568 pte = pmap_pte(pmap, va); 2569 if (*pte & VPTE_V) { 2570 KKASSERT(mpte != NULL); 2571 pmap_unwire_pte_hold(pmap, va, mpte); 2572 pa = VM_PAGE_TO_PHYS(m); 2573 KKASSERT(((*pte ^ pa) & VPTE_FRAME) == 0); 2574 lwkt_reltoken(&vm_token); 2575 vm_object_drop(pmap->pm_pteobj); 2576 return; 2577 } 2578 2579 /* 2580 * Enter on the PV list if part of our managed memory 2581 */ 2582 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2583 pmap_insert_entry(pmap, va, mpte, m); 2584 vm_page_flag_set(m, PG_MAPPED); 2585 } 2586 2587 /* 2588 * Increment counters 2589 */ 2590 ++pmap->pm_stats.resident_count; 2591 2592 pa = VM_PAGE_TO_PHYS(m); 2593 2594 /* 2595 * Now validate mapping with RO protection 2596 */ 2597 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2598 *pte = (vpte_t)pa | VPTE_V | VPTE_U; 2599 else 2600 *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED; 2601 /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */ 2602 /*pmap_inval_flush(&info); don't need for vkernel */ 2603 lwkt_reltoken(&vm_token); 2604 vm_object_drop(pmap->pm_pteobj); 2605 } 2606 2607 /* 2608 * Make a temporary mapping for a physical address. This is only intended 2609 * to be used for panic dumps. 2610 * 2611 * The caller is responsible for calling smp_invltlb(). 2612 */ 2613 void * 2614 pmap_kenter_temporary(vm_paddr_t pa, long i) 2615 { 2616 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa); 2617 return ((void *)crashdumpmap); 2618 } 2619 2620 #define MAX_INIT_PT (96) 2621 2622 /* 2623 * This routine preloads the ptes for a given object into the specified pmap. 2624 * This eliminates the blast of soft faults on process startup and 2625 * immediately after an mmap. 2626 * 2627 * No requirements. 2628 */ 2629 static int pmap_object_init_pt_callback(vm_page_t p, void *data); 2630 2631 void 2632 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, 2633 vm_object_t object, vm_pindex_t pindex, 2634 vm_size_t size, int limit) 2635 { 2636 struct rb_vm_page_scan_info info; 2637 struct lwp *lp; 2638 vm_size_t psize; 2639 2640 /* 2641 * We can't preinit if read access isn't set or there is no pmap 2642 * or object. 2643 */ 2644 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) 2645 return; 2646 2647 /* 2648 * We can't preinit if the pmap is not the current pmap 2649 */ 2650 lp = curthread->td_lwp; 2651 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) 2652 return; 2653 2654 psize = x86_64_btop(size); 2655 2656 if ((object->type != OBJT_VNODE) || 2657 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2658 (object->resident_page_count > MAX_INIT_PT))) { 2659 return; 2660 } 2661 2662 if (psize + pindex > object->size) { 2663 if (object->size < pindex) 2664 return; 2665 psize = object->size - pindex; 2666 } 2667 2668 if (psize == 0) 2669 return; 2670 2671 /* 2672 * Use a red-black scan to traverse the requested range and load 2673 * any valid pages found into the pmap. 2674 * 2675 * We cannot safely scan the object's memq unless we are in a 2676 * critical section since interrupts can remove pages from objects. 2677 */ 2678 info.start_pindex = pindex; 2679 info.end_pindex = pindex + psize - 1; 2680 info.limit = limit; 2681 info.mpte = NULL; 2682 info.addr = addr; 2683 info.pmap = pmap; 2684 2685 vm_object_hold_shared(object); 2686 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, 2687 pmap_object_init_pt_callback, &info); 2688 vm_object_drop(object); 2689 } 2690 2691 static 2692 int 2693 pmap_object_init_pt_callback(vm_page_t p, void *data) 2694 { 2695 struct rb_vm_page_scan_info *info = data; 2696 vm_pindex_t rel_index; 2697 /* 2698 * don't allow an madvise to blow away our really 2699 * free pages allocating pv entries. 2700 */ 2701 if ((info->limit & MAP_PREFAULT_MADVISE) && 2702 vmstats.v_free_count < vmstats.v_free_reserved) { 2703 return(-1); 2704 } 2705 2706 /* 2707 * Ignore list markers and ignore pages we cannot instantly 2708 * busy (while holding the object token). 2709 */ 2710 if (p->flags & PG_MARKER) 2711 return 0; 2712 if (vm_page_busy_try(p, TRUE)) 2713 return 0; 2714 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2715 (p->flags & PG_FICTITIOUS) == 0) { 2716 if ((p->queue - p->pc) == PQ_CACHE) 2717 vm_page_deactivate(p); 2718 rel_index = p->pindex - info->start_pindex; 2719 pmap_enter_quick(info->pmap, 2720 info->addr + x86_64_ptob(rel_index), p); 2721 } 2722 vm_page_wakeup(p); 2723 return(0); 2724 } 2725 2726 /* 2727 * Return TRUE if the pmap is in shape to trivially 2728 * pre-fault the specified address. 2729 * 2730 * Returns FALSE if it would be non-trivial or if a 2731 * pte is already loaded into the slot. 2732 * 2733 * No requirements. 2734 */ 2735 int 2736 pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) 2737 { 2738 pt_entry_t *pte; 2739 pd_entry_t *pde; 2740 int ret; 2741 2742 lwkt_gettoken(&vm_token); 2743 pde = pmap_pde(pmap, addr); 2744 if (pde == NULL || *pde == 0) { 2745 ret = 0; 2746 } else { 2747 pte = pmap_pde_to_pte(pde, addr); 2748 ret = (*pte) ? 0 : 1; 2749 } 2750 lwkt_reltoken(&vm_token); 2751 return (ret); 2752 } 2753 2754 /* 2755 * Change the wiring attribute for a map/virtual-address pair. 2756 * 2757 * The mapping must already exist in the pmap. 2758 * No other requirements. 2759 */ 2760 void 2761 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired, 2762 vm_map_entry_t entry __unused) 2763 { 2764 pt_entry_t *pte; 2765 2766 if (pmap == NULL) 2767 return; 2768 2769 lwkt_gettoken(&vm_token); 2770 pte = pmap_pte(pmap, va); 2771 2772 if (wired && !pmap_pte_w(pte)) 2773 pmap->pm_stats.wired_count++; 2774 else if (!wired && pmap_pte_w(pte)) 2775 pmap->pm_stats.wired_count--; 2776 2777 /* 2778 * Wiring is not a hardware characteristic so there is no need to 2779 * invalidate TLB. However, in an SMP environment we must use 2780 * a locked bus cycle to update the pte (if we are not using 2781 * the pmap_inval_*() API that is)... it's ok to do this for simple 2782 * wiring changes. 2783 */ 2784 if (wired) 2785 atomic_set_long(pte, VPTE_WIRED); 2786 else 2787 atomic_clear_long(pte, VPTE_WIRED); 2788 lwkt_reltoken(&vm_token); 2789 } 2790 2791 /* 2792 * Copy the range specified by src_addr/len 2793 * from the source map to the range dst_addr/len 2794 * in the destination map. 2795 * 2796 * This routine is only advisory and need not do anything. 2797 */ 2798 void 2799 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2800 vm_size_t len, vm_offset_t src_addr) 2801 { 2802 /* 2803 * XXX BUGGY. Amoung other things srcmpte is assumed to remain 2804 * valid through blocking calls, and that's just not going to 2805 * be the case. 2806 * 2807 * FIXME! 2808 */ 2809 return; 2810 } 2811 2812 /* 2813 * pmap_zero_page: 2814 * 2815 * Zero the specified physical page. 2816 * 2817 * This function may be called from an interrupt and no locking is 2818 * required. 2819 */ 2820 void 2821 pmap_zero_page(vm_paddr_t phys) 2822 { 2823 vm_offset_t va = PHYS_TO_DMAP(phys); 2824 2825 bzero((void *)va, PAGE_SIZE); 2826 } 2827 2828 /* 2829 * pmap_zero_page: 2830 * 2831 * Zero part of a physical page by mapping it into memory and clearing 2832 * its contents with bzero. 2833 * 2834 * off and size may not cover an area beyond a single hardware page. 2835 */ 2836 void 2837 pmap_zero_page_area(vm_paddr_t phys, int off, int size) 2838 { 2839 crit_enter(); 2840 vm_offset_t virt = PHYS_TO_DMAP(phys); 2841 bzero((char *)virt + off, size); 2842 crit_exit(); 2843 } 2844 2845 /* 2846 * pmap_copy_page: 2847 * 2848 * Copy the physical page from the source PA to the target PA. 2849 * This function may be called from an interrupt. No locking 2850 * is required. 2851 */ 2852 void 2853 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) 2854 { 2855 vm_offset_t src_virt, dst_virt; 2856 2857 crit_enter(); 2858 src_virt = PHYS_TO_DMAP(src); 2859 dst_virt = PHYS_TO_DMAP(dst); 2860 bcopy((void *)src_virt, (void *)dst_virt, PAGE_SIZE); 2861 crit_exit(); 2862 } 2863 2864 /* 2865 * pmap_copy_page_frag: 2866 * 2867 * Copy the physical page from the source PA to the target PA. 2868 * This function may be called from an interrupt. No locking 2869 * is required. 2870 */ 2871 void 2872 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) 2873 { 2874 vm_offset_t src_virt, dst_virt; 2875 2876 crit_enter(); 2877 src_virt = PHYS_TO_DMAP(src); 2878 dst_virt = PHYS_TO_DMAP(dst); 2879 bcopy((char *)src_virt + (src & PAGE_MASK), 2880 (char *)dst_virt + (dst & PAGE_MASK), 2881 bytes); 2882 crit_exit(); 2883 } 2884 2885 /* 2886 * Returns true if the pmap's pv is one of the first 16 pvs linked to 2887 * from this page. This count may be changed upwards or downwards 2888 * in the future; it is only necessary that true be returned for a small 2889 * subset of pmaps for proper page aging. 2890 * 2891 * No other requirements. 2892 */ 2893 boolean_t 2894 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2895 { 2896 pv_entry_t pv; 2897 int loops = 0; 2898 2899 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2900 return FALSE; 2901 2902 crit_enter(); 2903 lwkt_gettoken(&vm_token); 2904 2905 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2906 if (pv->pv_pmap == pmap) { 2907 lwkt_reltoken(&vm_token); 2908 crit_exit(); 2909 return TRUE; 2910 } 2911 loops++; 2912 if (loops >= 16) 2913 break; 2914 } 2915 lwkt_reltoken(&vm_token); 2916 crit_exit(); 2917 return (FALSE); 2918 } 2919 2920 /* 2921 * Remove all pages from specified address space this aids process 2922 * exit speeds. Also, this code is special cased for current 2923 * process only, but can have the more generic (and slightly slower) 2924 * mode enabled. This is much faster than pmap_remove in the case 2925 * of running down an entire address space. 2926 * 2927 * No other requirements. 2928 */ 2929 void 2930 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2931 { 2932 pt_entry_t *pte, tpte; 2933 pv_entry_t pv, npv; 2934 vm_page_t m; 2935 int save_generation; 2936 2937 if (pmap->pm_pteobj) 2938 vm_object_hold(pmap->pm_pteobj); 2939 lwkt_gettoken(&vm_token); 2940 2941 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2942 if (pv->pv_va >= eva || pv->pv_va < sva) { 2943 npv = TAILQ_NEXT(pv, pv_plist); 2944 continue; 2945 } 2946 2947 KKASSERT(pmap == pv->pv_pmap); 2948 2949 pte = pmap_pte(pmap, pv->pv_va); 2950 2951 /* 2952 * We cannot remove wired pages from a process' mapping 2953 * at this time 2954 */ 2955 if (*pte & VPTE_WIRED) { 2956 npv = TAILQ_NEXT(pv, pv_plist); 2957 continue; 2958 } 2959 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2960 2961 m = PHYS_TO_VM_PAGE(tpte & VPTE_FRAME); 2962 2963 KASSERT(m < &vm_page_array[vm_page_array_size], 2964 ("pmap_remove_pages: bad tpte %lx", tpte)); 2965 2966 KKASSERT(pmap->pm_stats.resident_count > 0); 2967 --pmap->pm_stats.resident_count; 2968 2969 /* 2970 * Update the vm_page_t clean and reference bits. 2971 */ 2972 if (tpte & VPTE_M) { 2973 vm_page_dirty(m); 2974 } 2975 2976 npv = TAILQ_NEXT(pv, pv_plist); 2977 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 2978 save_generation = ++pmap->pm_generation; 2979 2980 m->md.pv_list_count--; 2981 atomic_add_int(&m->object->agg_pv_list_count, -1); 2982 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2983 if (TAILQ_EMPTY(&m->md.pv_list)) 2984 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2985 2986 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2987 free_pv_entry(pv); 2988 2989 /* 2990 * Restart the scan if we blocked during the unuse or free 2991 * calls and other removals were made. 2992 */ 2993 if (save_generation != pmap->pm_generation) { 2994 kprintf("Warning: pmap_remove_pages race-A avoided\n"); 2995 npv = TAILQ_FIRST(&pmap->pm_pvlist); 2996 } 2997 } 2998 lwkt_reltoken(&vm_token); 2999 if (pmap->pm_pteobj) 3000 vm_object_drop(pmap->pm_pteobj); 3001 } 3002 3003 /* 3004 * pmap_testbit tests bits in active mappings of a VM page. 3005 */ 3006 static boolean_t 3007 pmap_testbit(vm_page_t m, int bit) 3008 { 3009 pv_entry_t pv; 3010 pt_entry_t *pte; 3011 3012 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3013 return FALSE; 3014 3015 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 3016 return FALSE; 3017 3018 crit_enter(); 3019 3020 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3021 /* 3022 * if the bit being tested is the modified bit, then 3023 * mark clean_map and ptes as never 3024 * modified. 3025 */ 3026 if (bit & (VPTE_A|VPTE_M)) { 3027 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3028 continue; 3029 } 3030 3031 #if defined(PMAP_DIAGNOSTIC) 3032 if (pv->pv_pmap == NULL) { 3033 kprintf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 3034 continue; 3035 } 3036 #endif 3037 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3038 if (*pte & bit) { 3039 crit_exit(); 3040 return TRUE; 3041 } 3042 } 3043 crit_exit(); 3044 return (FALSE); 3045 } 3046 3047 /* 3048 * This routine is used to clear bits in ptes. Certain bits require special 3049 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. 3050 * 3051 * This routine is only called with certain VPTE_* bit combinations. 3052 */ 3053 static __inline void 3054 pmap_clearbit(vm_page_t m, int bit) 3055 { 3056 pv_entry_t pv; 3057 pt_entry_t *pte; 3058 pt_entry_t pbits; 3059 3060 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3061 return; 3062 3063 crit_enter(); 3064 3065 /* 3066 * Loop over all current mappings setting/clearing as appropos If 3067 * setting RO do we need to clear the VAC? 3068 */ 3069 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3070 /* 3071 * don't write protect pager mappings 3072 */ 3073 if (bit == VPTE_RW) { 3074 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3075 continue; 3076 } 3077 3078 #if defined(PMAP_DIAGNOSTIC) 3079 if (pv->pv_pmap == NULL) { 3080 kprintf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va); 3081 continue; 3082 } 3083 #endif 3084 3085 /* 3086 * Careful here. We can use a locked bus instruction to 3087 * clear VPTE_A or VPTE_M safely but we need to synchronize 3088 * with the target cpus when we mess with VPTE_RW. 3089 * 3090 * On virtual kernels we must force a new fault-on-write 3091 * in the real kernel if we clear the Modify bit ourselves, 3092 * otherwise the real kernel will not get a new fault and 3093 * will never set our Modify bit again. 3094 */ 3095 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3096 if (*pte & bit) { 3097 if (bit == VPTE_RW) { 3098 /* 3099 * We must also clear VPTE_M when clearing 3100 * VPTE_RW 3101 */ 3102 pbits = pmap_clean_pte(pte, pv->pv_pmap, 3103 pv->pv_va); 3104 if (pbits & VPTE_M) 3105 vm_page_dirty(m); 3106 } else if (bit == VPTE_M) { 3107 /* 3108 * We do not have to make the page read-only 3109 * when clearing the Modify bit. The real 3110 * kernel will make the real PTE read-only 3111 * or otherwise detect the write and set 3112 * our VPTE_M again simply by us invalidating 3113 * the real kernel VA for the pmap (as we did 3114 * above). This allows the real kernel to 3115 * handle the write fault without forwarding 3116 * the fault to us. 3117 */ 3118 atomic_clear_long(pte, VPTE_M); 3119 } else if ((bit & (VPTE_RW|VPTE_M)) == (VPTE_RW|VPTE_M)) { 3120 /* 3121 * We've been asked to clear W & M, I guess 3122 * the caller doesn't want us to update 3123 * the dirty status of the VM page. 3124 */ 3125 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va); 3126 } else { 3127 /* 3128 * We've been asked to clear bits that do 3129 * not interact with hardware. 3130 */ 3131 atomic_clear_long(pte, bit); 3132 } 3133 } 3134 } 3135 crit_exit(); 3136 } 3137 3138 /* 3139 * Lower the permission for all mappings to a given page. 3140 * 3141 * No other requirements. 3142 */ 3143 void 3144 pmap_page_protect(vm_page_t m, vm_prot_t prot) 3145 { 3146 /* JG NX support? */ 3147 if ((prot & VM_PROT_WRITE) == 0) { 3148 lwkt_gettoken(&vm_token); 3149 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3150 pmap_clearbit(m, VPTE_RW); 3151 vm_page_flag_clear(m, PG_WRITEABLE); 3152 } else { 3153 pmap_remove_all(m); 3154 } 3155 lwkt_reltoken(&vm_token); 3156 } 3157 } 3158 3159 vm_paddr_t 3160 pmap_phys_address(vm_pindex_t ppn) 3161 { 3162 return (x86_64_ptob(ppn)); 3163 } 3164 3165 /* 3166 * Return a count of reference bits for a page, clearing those bits. 3167 * It is not necessary for every reference bit to be cleared, but it 3168 * is necessary that 0 only be returned when there are truly no 3169 * reference bits set. 3170 * 3171 * XXX: The exact number of bits to check and clear is a matter that 3172 * should be tested and standardized at some point in the future for 3173 * optimal aging of shared pages. 3174 * 3175 * No other requirements. 3176 */ 3177 int 3178 pmap_ts_referenced(vm_page_t m) 3179 { 3180 pv_entry_t pv, pvf, pvn; 3181 pt_entry_t *pte; 3182 int rtval = 0; 3183 3184 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3185 return (rtval); 3186 3187 crit_enter(); 3188 lwkt_gettoken(&vm_token); 3189 3190 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3191 3192 pvf = pv; 3193 3194 do { 3195 pvn = TAILQ_NEXT(pv, pv_list); 3196 3197 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3198 3199 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3200 3201 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3202 continue; 3203 3204 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3205 3206 if (pte && (*pte & VPTE_A)) { 3207 atomic_clear_long(pte, VPTE_A); 3208 rtval++; 3209 if (rtval > 4) { 3210 break; 3211 } 3212 } 3213 } while ((pv = pvn) != NULL && pv != pvf); 3214 } 3215 lwkt_reltoken(&vm_token); 3216 crit_exit(); 3217 3218 return (rtval); 3219 } 3220 3221 /* 3222 * Return whether or not the specified physical page was modified 3223 * in any physical maps. 3224 * 3225 * No other requirements. 3226 */ 3227 boolean_t 3228 pmap_is_modified(vm_page_t m) 3229 { 3230 boolean_t res; 3231 3232 lwkt_gettoken(&vm_token); 3233 res = pmap_testbit(m, VPTE_M); 3234 lwkt_reltoken(&vm_token); 3235 return (res); 3236 } 3237 3238 /* 3239 * Clear the modify bits on the specified physical page. 3240 * 3241 * No other requirements. 3242 */ 3243 void 3244 pmap_clear_modify(vm_page_t m) 3245 { 3246 lwkt_gettoken(&vm_token); 3247 pmap_clearbit(m, VPTE_M); 3248 lwkt_reltoken(&vm_token); 3249 } 3250 3251 /* 3252 * Clear the reference bit on the specified physical page. 3253 * 3254 * No other requirements. 3255 */ 3256 void 3257 pmap_clear_reference(vm_page_t m) 3258 { 3259 lwkt_gettoken(&vm_token); 3260 pmap_clearbit(m, VPTE_A); 3261 lwkt_reltoken(&vm_token); 3262 } 3263 3264 /* 3265 * Miscellaneous support routines follow 3266 */ 3267 3268 static void 3269 i386_protection_init(void) 3270 { 3271 int *kp, prot; 3272 3273 kp = protection_codes; 3274 for (prot = 0; prot < 8; prot++) { 3275 if (prot & VM_PROT_READ) 3276 *kp |= 0; /* if it's VALID is readeable */ 3277 if (prot & VM_PROT_WRITE) 3278 *kp |= VPTE_RW; 3279 if (prot & VM_PROT_EXECUTE) 3280 *kp |= 0; /* if it's VALID is executable */ 3281 ++kp; 3282 } 3283 } 3284 3285 /* 3286 * Sets the memory attribute for the specified page. 3287 */ 3288 void 3289 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3290 { 3291 /* This is a vkernel, do nothing */ 3292 } 3293 3294 /* 3295 * Change the PAT attribute on an existing kernel memory map. Caller 3296 * must ensure that the virtual memory in question is not accessed 3297 * during the adjustment. 3298 */ 3299 void 3300 pmap_change_attr(vm_offset_t va, vm_size_t count, int mode) 3301 { 3302 /* This is a vkernel, do nothing */ 3303 } 3304 3305 /* 3306 * Perform the pmap work for mincore 3307 * 3308 * No other requirements. 3309 */ 3310 int 3311 pmap_mincore(pmap_t pmap, vm_offset_t addr) 3312 { 3313 pt_entry_t *ptep, pte; 3314 vm_page_t m; 3315 int val = 0; 3316 3317 lwkt_gettoken(&vm_token); 3318 ptep = pmap_pte(pmap, addr); 3319 3320 if (ptep && (pte = *ptep) != 0) { 3321 vm_paddr_t pa; 3322 3323 val = MINCORE_INCORE; 3324 if ((pte & VPTE_MANAGED) == 0) 3325 goto done; 3326 3327 pa = pte & VPTE_FRAME; 3328 3329 m = PHYS_TO_VM_PAGE(pa); 3330 3331 /* 3332 * Modified by us 3333 */ 3334 if (pte & VPTE_M) 3335 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3336 /* 3337 * Modified by someone 3338 */ 3339 else if (m->dirty || pmap_is_modified(m)) 3340 val |= MINCORE_MODIFIED_OTHER; 3341 /* 3342 * Referenced by us 3343 */ 3344 if (pte & VPTE_A) 3345 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3346 3347 /* 3348 * Referenced by someone 3349 */ 3350 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3351 val |= MINCORE_REFERENCED_OTHER; 3352 vm_page_flag_set(m, PG_REFERENCED); 3353 } 3354 } 3355 done: 3356 lwkt_reltoken(&vm_token); 3357 return val; 3358 } 3359 3360 /* 3361 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new 3362 * vmspace will be ref'd and the old one will be deref'd. 3363 * 3364 * Caller must hold vmspace->vm_map.token for oldvm and newvm 3365 */ 3366 void 3367 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) 3368 { 3369 struct vmspace *oldvm; 3370 struct lwp *lp; 3371 3372 crit_enter(); 3373 oldvm = p->p_vmspace; 3374 if (oldvm != newvm) { 3375 if (adjrefs) 3376 vmspace_ref(newvm); 3377 p->p_vmspace = newvm; 3378 KKASSERT(p->p_nthreads == 1); 3379 lp = RB_ROOT(&p->p_lwp_tree); 3380 pmap_setlwpvm(lp, newvm); 3381 if (adjrefs) 3382 vmspace_rel(oldvm); 3383 } 3384 crit_exit(); 3385 } 3386 3387 /* 3388 * Set the vmspace for a LWP. The vmspace is almost universally set the 3389 * same as the process vmspace, but virtual kernels need to swap out contexts 3390 * on a per-lwp basis. 3391 */ 3392 void 3393 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) 3394 { 3395 struct vmspace *oldvm; 3396 struct pmap *pmap; 3397 3398 oldvm = lp->lwp_vmspace; 3399 if (oldvm != newvm) { 3400 crit_enter(); 3401 lp->lwp_vmspace = newvm; 3402 if (curthread->td_lwp == lp) { 3403 pmap = vmspace_pmap(newvm); 3404 ATOMIC_CPUMASK_ORBIT(pmap->pm_active, mycpu->gd_cpuid); 3405 if (pmap->pm_active_lock & CPULOCK_EXCL) 3406 pmap_interlock_wait(newvm); 3407 #if defined(SWTCH_OPTIM_STATS) 3408 tlb_flush_count++; 3409 #endif 3410 pmap = vmspace_pmap(oldvm); 3411 ATOMIC_CPUMASK_NANDBIT(pmap->pm_active, 3412 mycpu->gd_cpuid); 3413 } 3414 crit_exit(); 3415 } 3416 } 3417 3418 /* 3419 * The swtch code tried to switch in a heavy weight process whos pmap 3420 * is locked by another cpu. We have to wait for the lock to clear before 3421 * the pmap can be used. 3422 */ 3423 void 3424 pmap_interlock_wait (struct vmspace *vm) 3425 { 3426 pmap_t pmap = vmspace_pmap(vm); 3427 3428 if (pmap->pm_active_lock & CPULOCK_EXCL) { 3429 crit_enter(); 3430 while (pmap->pm_active_lock & CPULOCK_EXCL) { 3431 cpu_ccfence(); 3432 pthread_yield(); 3433 } 3434 crit_exit(); 3435 } 3436 } 3437 3438 vm_offset_t 3439 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3440 { 3441 3442 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3443 return addr; 3444 } 3445 3446 addr = roundup2(addr, NBPDR); 3447 return addr; 3448 } 3449 3450 /* 3451 * Used by kmalloc/kfree, page already exists at va 3452 */ 3453 vm_page_t 3454 pmap_kvtom(vm_offset_t va) 3455 { 3456 vpte_t *ptep; 3457 3458 KKASSERT(va >= KvaStart && va < KvaEnd); 3459 ptep = vtopte(va); 3460 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); 3461 } 3462 3463 void 3464 pmap_object_init(vm_object_t object) 3465 { 3466 /* empty */ 3467 } 3468 3469 void 3470 pmap_object_free(vm_object_t object) 3471 { 3472 /* empty */ 3473 } 3474