1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1991 Regents of the University of California. 5 * Copyright (c) 1994 John S. Dyson 6 * Copyright (c) 1994 David Greenman 7 * Copyright (c) 2003 Peter Wemm 8 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 9 * Copyright (c) 2008, 2009 The DragonFly Project. 10 * Copyright (c) 2008, 2009 Jordan Gordeev. 11 * All rights reserved. 12 * 13 * This code is derived from software contributed to Berkeley by 14 * the Systems Programming Group of the University of Utah Computer 15 * Science Department and William Jolitz of UUNET Technologies Inc. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. All advertising materials mentioning features or use of this software 26 * must display the following acknowledgement: 27 * This product includes software developed by the University of 28 * California, Berkeley and its contributors. 29 * 4. Neither the name of the University nor the names of its contributors 30 * may be used to endorse or promote products derived from this software 31 * without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 * SUCH DAMAGE. 44 * 45 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 46 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ 47 */ 48 49 /* 50 * Manages physical address maps. 51 */ 52 53 #if JG 54 #include "opt_pmap.h" 55 #endif 56 #include "opt_msgbuf.h" 57 58 #include <sys/param.h> 59 #include <sys/systm.h> 60 #include <sys/kernel.h> 61 #include <sys/proc.h> 62 #include <sys/msgbuf.h> 63 #include <sys/vmmeter.h> 64 #include <sys/mman.h> 65 #include <sys/vmspace.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <sys/sysctl.h> 70 #include <sys/lock.h> 71 #include <vm/vm_kern.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_pageout.h> 77 #include <vm/vm_pager.h> 78 #include <vm/vm_zone.h> 79 80 #include <sys/user.h> 81 #include <sys/thread2.h> 82 #include <sys/sysref2.h> 83 #include <sys/spinlock2.h> 84 #include <vm/vm_page2.h> 85 86 #include <machine/cputypes.h> 87 #include <machine/md_var.h> 88 #include <machine/specialreg.h> 89 #include <machine/smp.h> 90 #include <machine/globaldata.h> 91 #include <machine/pmap.h> 92 #include <machine/pmap_inval.h> 93 94 #include <ddb/ddb.h> 95 96 #include <stdio.h> 97 #include <assert.h> 98 #include <stdlib.h> 99 #include <pthread.h> 100 101 #define PMAP_KEEP_PDIRS 102 #ifndef PMAP_SHPGPERPROC 103 #define PMAP_SHPGPERPROC 1000 104 #endif 105 106 #if defined(DIAGNOSTIC) 107 #define PMAP_DIAGNOSTIC 108 #endif 109 110 #define MINPV 2048 111 112 #if !defined(PMAP_DIAGNOSTIC) 113 #define PMAP_INLINE __inline 114 #else 115 #define PMAP_INLINE 116 #endif 117 118 /* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121 static pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va); 122 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 123 124 #define pmap_pde_v(pte) ((*(pd_entry_t *)pte & VPTE_V) != 0) 125 #define pmap_pte_w(pte) ((*(pt_entry_t *)pte & VPTE_WIRED) != 0) 126 #define pmap_pte_m(pte) ((*(pt_entry_t *)pte & VPTE_M) != 0) 127 #define pmap_pte_u(pte) ((*(pt_entry_t *)pte & VPTE_A) != 0) 128 #define pmap_pte_v(pte) ((*(pt_entry_t *)pte & VPTE_V) != 0) 129 130 /* 131 * Given a map and a machine independent protection code, 132 * convert to a vax protection code. 133 */ 134 #define pte_prot(m, p) \ 135 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) 136 static int protection_codes[8]; 137 138 struct pmap kernel_pmap; 139 static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list); 140 141 static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 142 143 static vm_object_t kptobj; 144 145 static int nkpt; 146 147 static uint64_t KPDphys; /* phys addr of kernel level 2 */ 148 uint64_t KPDPphys; /* phys addr of kernel level 3 */ 149 uint64_t KPML4phys; /* phys addr of kernel level 4 */ 150 151 extern int vmm_enabled; 152 extern void *vkernel_stack; 153 154 /* 155 * Data for the pv entry allocation mechanism 156 */ 157 static vm_zone_t pvzone; 158 static struct vm_zone pvzone_store; 159 static struct vm_object pvzone_obj; 160 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; 161 static int pmap_pagedaemon_waken = 0; 162 static struct pv_entry *pvinit; 163 164 /* 165 * All those kernel PT submaps that BSD is so fond of 166 */ 167 pt_entry_t *CMAP1 = NULL, *ptmmap; 168 caddr_t CADDR1 = NULL; 169 static pt_entry_t *msgbufmap; 170 171 uint64_t KPTphys; 172 173 static PMAP_INLINE void free_pv_entry (pv_entry_t pv); 174 static pv_entry_t get_pv_entry (void); 175 static void i386_protection_init (void); 176 static __inline void pmap_clearbit (vm_page_t m, int bit); 177 178 static void pmap_remove_all (vm_page_t m); 179 static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, 180 vm_offset_t sva); 181 static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); 182 static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, 183 vm_offset_t va); 184 static boolean_t pmap_testbit (vm_page_t m, int bit); 185 static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, 186 vm_page_t mpte, vm_page_t m); 187 188 static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); 189 190 static int pmap_release_free_page (pmap_t pmap, vm_page_t p); 191 static vm_page_t _pmap_allocpte (pmap_t pmap, vm_pindex_t ptepindex); 192 #if JGPMAP32 193 static pt_entry_t * pmap_pte_quick (pmap_t pmap, vm_offset_t va); 194 #endif 195 static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); 196 static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); 197 198 /* 199 * pmap_pte_quick: 200 * 201 * Super fast pmap_pte routine best used when scanning the pv lists. 202 * This eliminates many course-grained invltlb calls. Note that many of 203 * the pv list scans are across different pmaps and it is very wasteful 204 * to do an entire invltlb when checking a single mapping. 205 * 206 * Should only be called while in a critical section. 207 */ 208 #if JGPMAP32 209 static __inline pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t va); 210 211 static pt_entry_t * 212 pmap_pte_quick(pmap_t pmap, vm_offset_t va) 213 { 214 return pmap_pte(pmap, va); 215 } 216 #endif 217 218 /* Return a non-clipped PD index for a given VA */ 219 static __inline vm_pindex_t 220 pmap_pde_pindex(vm_offset_t va) 221 { 222 return va >> PDRSHIFT; 223 } 224 225 /* Return various clipped indexes for a given VA */ 226 static __inline vm_pindex_t 227 pmap_pte_index(vm_offset_t va) 228 { 229 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 230 } 231 232 static __inline vm_pindex_t 233 pmap_pde_index(vm_offset_t va) 234 { 235 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 236 } 237 238 static __inline vm_pindex_t 239 pmap_pdpe_index(vm_offset_t va) 240 { 241 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 242 } 243 244 static __inline vm_pindex_t 245 pmap_pml4e_index(vm_offset_t va) 246 { 247 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 248 } 249 250 /* Return a pointer to the PML4 slot that corresponds to a VA */ 251 static __inline pml4_entry_t * 252 pmap_pml4e(pmap_t pmap, vm_offset_t va) 253 { 254 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 255 } 256 257 /* Return a pointer to the PDP slot that corresponds to a VA */ 258 static __inline pdp_entry_t * 259 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) 260 { 261 pdp_entry_t *pdpe; 262 263 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & VPTE_FRAME); 264 return (&pdpe[pmap_pdpe_index(va)]); 265 } 266 267 /* Return a pointer to the PDP slot that corresponds to a VA */ 268 static __inline pdp_entry_t * 269 pmap_pdpe(pmap_t pmap, vm_offset_t va) 270 { 271 pml4_entry_t *pml4e; 272 273 pml4e = pmap_pml4e(pmap, va); 274 if ((*pml4e & VPTE_V) == 0) 275 return NULL; 276 return (pmap_pml4e_to_pdpe(pml4e, va)); 277 } 278 279 /* Return a pointer to the PD slot that corresponds to a VA */ 280 static __inline pd_entry_t * 281 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) 282 { 283 pd_entry_t *pde; 284 285 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & VPTE_FRAME); 286 return (&pde[pmap_pde_index(va)]); 287 } 288 289 /* Return a pointer to the PD slot that corresponds to a VA */ 290 static __inline pd_entry_t * 291 pmap_pde(pmap_t pmap, vm_offset_t va) 292 { 293 pdp_entry_t *pdpe; 294 295 pdpe = pmap_pdpe(pmap, va); 296 if (pdpe == NULL || (*pdpe & VPTE_V) == 0) 297 return NULL; 298 return (pmap_pdpe_to_pde(pdpe, va)); 299 } 300 301 /* Return a pointer to the PT slot that corresponds to a VA */ 302 static __inline pt_entry_t * 303 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 304 { 305 pt_entry_t *pte; 306 307 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & VPTE_FRAME); 308 return (&pte[pmap_pte_index(va)]); 309 } 310 311 /* Return a pointer to the PT slot that corresponds to a VA */ 312 static __inline pt_entry_t * 313 pmap_pte(pmap_t pmap, vm_offset_t va) 314 { 315 pd_entry_t *pde; 316 317 pde = pmap_pde(pmap, va); 318 if (pde == NULL || (*pde & VPTE_V) == 0) 319 return NULL; 320 if ((*pde & VPTE_PS) != 0) /* compat with i386 pmap_pte() */ 321 return ((pt_entry_t *)pde); 322 return (pmap_pde_to_pte(pde, va)); 323 } 324 325 326 #if JGV 327 PMAP_INLINE pt_entry_t * 328 vtopte(vm_offset_t va) 329 { 330 uint64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + 331 NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 332 333 return (PTmap + ((va >> PAGE_SHIFT) & mask)); 334 } 335 336 static __inline pd_entry_t * 337 vtopde(vm_offset_t va) 338 { 339 uint64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + 340 NPML4EPGSHIFT)) - 1); 341 342 return (PDmap + ((va >> PDRSHIFT) & mask)); 343 } 344 #else 345 static PMAP_INLINE pt_entry_t * 346 vtopte(vm_offset_t va) 347 { 348 pt_entry_t *x; 349 x = pmap_pte(&kernel_pmap, va); 350 assert(x != NULL); 351 return x; 352 } 353 354 static __inline pd_entry_t * 355 vtopde(vm_offset_t va) 356 { 357 pd_entry_t *x; 358 x = pmap_pde(&kernel_pmap, va); 359 assert(x != NULL); 360 return x; 361 } 362 #endif 363 364 static uint64_t 365 allocpages(vm_paddr_t *firstaddr, int n) 366 { 367 uint64_t ret; 368 369 ret = *firstaddr; 370 #if JGV 371 bzero((void *)ret, n * PAGE_SIZE); 372 #endif 373 *firstaddr += n * PAGE_SIZE; 374 return (ret); 375 } 376 377 static void 378 create_dmap_vmm(vm_paddr_t *firstaddr) 379 { 380 void *stack_addr; 381 int pml4_stack_index; 382 int pdp_stack_index; 383 int pd_stack_index; 384 long i,j; 385 int regs[4]; 386 int amd_feature; 387 388 uint64_t KPDP_DMAP_phys = allocpages(firstaddr, NDMPML4E); 389 uint64_t KPDP_VSTACK_phys = allocpages(firstaddr, 1); 390 uint64_t KPD_VSTACK_phys = allocpages(firstaddr, 1); 391 392 pml4_entry_t *KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 393 pdp_entry_t *KPDP_DMAP_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_DMAP_phys); 394 pdp_entry_t *KPDP_VSTACK_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_VSTACK_phys); 395 pd_entry_t *KPD_VSTACK_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_VSTACK_phys); 396 397 bzero(KPDP_DMAP_virt, NDMPML4E * PAGE_SIZE); 398 bzero(KPDP_VSTACK_virt, 1 * PAGE_SIZE); 399 bzero(KPD_VSTACK_virt, 1 * PAGE_SIZE); 400 401 do_cpuid(0x80000001, regs); 402 amd_feature = regs[3]; 403 404 /* Build the mappings for the first 512GB */ 405 if (amd_feature & AMDID_PAGE1GB) { 406 /* In pages of 1 GB, if supported */ 407 for (i = 0; i < NPDPEPG; i++) { 408 KPDP_DMAP_virt[i] = ((uint64_t)i << PDPSHIFT); 409 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 410 } 411 } else { 412 /* In page of 2MB, otherwise */ 413 for (i = 0; i < NPDPEPG; i++) { 414 uint64_t KPD_DMAP_phys = allocpages(firstaddr, 1); 415 pd_entry_t *KPD_DMAP_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_DMAP_phys); 416 417 bzero(KPD_DMAP_virt, PAGE_SIZE); 418 419 KPDP_DMAP_virt[i] = KPD_DMAP_phys; 420 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_U; 421 422 /* For each PD, we have to allocate NPTEPG PT */ 423 for (j = 0; j < NPTEPG; j++) { 424 KPD_DMAP_virt[j] = (i << PDPSHIFT) | (j << PDRSHIFT); 425 KPD_DMAP_virt[j] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 426 } 427 } 428 } 429 430 /* DMAP for the first 512G */ 431 KPML4virt[0] = KPDP_DMAP_phys; 432 KPML4virt[0] |= VPTE_RW | VPTE_V | VPTE_U; 433 434 /* create a 2 MB map of the new stack */ 435 pml4_stack_index = (uint64_t)&stack_addr >> PML4SHIFT; 436 KPML4virt[pml4_stack_index] = KPDP_VSTACK_phys; 437 KPML4virt[pml4_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 438 439 pdp_stack_index = ((uint64_t)&stack_addr & PML4MASK) >> PDPSHIFT; 440 KPDP_VSTACK_virt[pdp_stack_index] = KPD_VSTACK_phys; 441 KPDP_VSTACK_virt[pdp_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 442 443 pd_stack_index = ((uint64_t)&stack_addr & PDPMASK) >> PDRSHIFT; 444 KPD_VSTACK_virt[pd_stack_index] = (uint64_t) vkernel_stack; 445 KPD_VSTACK_virt[pd_stack_index] |= VPTE_RW | VPTE_V | VPTE_U | VPTE_PS; 446 } 447 448 static void 449 create_pagetables(vm_paddr_t *firstaddr, int64_t ptov_offset) 450 { 451 int i; 452 pml4_entry_t *KPML4virt; 453 pdp_entry_t *KPDPvirt; 454 pd_entry_t *KPDvirt; 455 pt_entry_t *KPTvirt; 456 int kpml4i = pmap_pml4e_index(ptov_offset); 457 int kpdpi = pmap_pdpe_index(ptov_offset); 458 int kpdi = pmap_pde_index(ptov_offset); 459 460 /* 461 * Calculate NKPT - number of kernel page tables. We have to 462 * accomodoate prealloction of the vm_page_array, dump bitmap, 463 * MSGBUF_SIZE, and other stuff. Be generous. 464 * 465 * Maxmem is in pages. 466 */ 467 nkpt = (Maxmem * (sizeof(struct vm_page) * 2) + MSGBUF_SIZE) / NBPDR; 468 /* 469 * Allocate pages 470 */ 471 KPML4phys = allocpages(firstaddr, 1); 472 KPDPphys = allocpages(firstaddr, NKPML4E); 473 KPDphys = allocpages(firstaddr, NKPDPE); 474 KPTphys = allocpages(firstaddr, nkpt); 475 476 KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 477 KPDPvirt = (pdp_entry_t *)PHYS_TO_DMAP(KPDPphys); 478 KPDvirt = (pd_entry_t *)PHYS_TO_DMAP(KPDphys); 479 KPTvirt = (pt_entry_t *)PHYS_TO_DMAP(KPTphys); 480 481 bzero(KPML4virt, 1 * PAGE_SIZE); 482 bzero(KPDPvirt, NKPML4E * PAGE_SIZE); 483 bzero(KPDvirt, NKPDPE * PAGE_SIZE); 484 bzero(KPTvirt, nkpt * PAGE_SIZE); 485 486 /* Now map the page tables at their location within PTmap */ 487 for (i = 0; i < nkpt; i++) { 488 KPDvirt[i + kpdi] = KPTphys + (i << PAGE_SHIFT); 489 KPDvirt[i + kpdi] |= VPTE_RW | VPTE_V | VPTE_U; 490 } 491 492 /* And connect up the PD to the PDP */ 493 for (i = 0; i < NKPDPE; i++) { 494 KPDPvirt[i + kpdpi] = KPDphys + (i << PAGE_SHIFT); 495 KPDPvirt[i + kpdpi] |= VPTE_RW | VPTE_V | VPTE_U; 496 } 497 498 /* And recursively map PML4 to itself in order to get PTmap */ 499 KPML4virt[PML4PML4I] = KPML4phys; 500 KPML4virt[PML4PML4I] |= VPTE_RW | VPTE_V | VPTE_U; 501 502 /* Connect the KVA slot up to the PML4 */ 503 KPML4virt[kpml4i] = KPDPphys; 504 KPML4virt[kpml4i] |= VPTE_RW | VPTE_V | VPTE_U; 505 } 506 507 /* 508 * Typically used to initialize a fictitious page by vm/device_pager.c 509 */ 510 void 511 pmap_page_init(struct vm_page *m) 512 { 513 vm_page_init(m); 514 TAILQ_INIT(&m->md.pv_list); 515 } 516 517 /* 518 * Bootstrap the system enough to run with virtual memory. 519 * 520 * On the i386 this is called after mapping has already been enabled 521 * and just syncs the pmap module with what has already been done. 522 * [We can't call it easily with mapping off since the kernel is not 523 * mapped with PA == VA, hence we would have to relocate every address 524 * from the linked base (virtual) address "KERNBASE" to the actual 525 * (physical) address starting relative to 0] 526 */ 527 void 528 pmap_bootstrap(vm_paddr_t *firstaddr, int64_t ptov_offset) 529 { 530 vm_offset_t va; 531 pt_entry_t *pte; 532 533 /* 534 * Create an initial set of page tables to run the kernel in. 535 */ 536 create_pagetables(firstaddr, ptov_offset); 537 538 /* Create the DMAP for the VMM */ 539 if (vmm_enabled) { 540 create_dmap_vmm(firstaddr); 541 } 542 543 virtual_start = KvaStart; 544 virtual_end = KvaEnd; 545 546 /* 547 * Initialize protection array. 548 */ 549 i386_protection_init(); 550 551 /* 552 * The kernel's pmap is statically allocated so we don't have to use 553 * pmap_create, which is unlikely to work correctly at this part of 554 * the boot sequence (XXX and which no longer exists). 555 * 556 * The kernel_pmap's pm_pteobj is used only for locking and not 557 * for mmu pages. 558 */ 559 kernel_pmap.pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 560 kernel_pmap.pm_count = 1; 561 /* don't allow deactivation */ 562 CPUMASK_ASSALLONES(kernel_pmap.pm_active); 563 kernel_pmap.pm_pteobj = NULL; /* see pmap_init */ 564 TAILQ_INIT(&kernel_pmap.pm_pvlist); 565 TAILQ_INIT(&kernel_pmap.pm_pvlist_free); 566 lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok"); 567 spin_init(&kernel_pmap.pm_spin, "pmapbootstrap"); 568 569 /* 570 * Reserve some special page table entries/VA space for temporary 571 * mapping of pages. 572 */ 573 #define SYSMAP(c, p, v, n) \ 574 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 575 576 va = virtual_start; 577 pte = pmap_pte(&kernel_pmap, va); 578 /* 579 * CMAP1/CMAP2 are used for zeroing and copying pages. 580 */ 581 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 582 583 #if JGV 584 /* 585 * Crashdump maps. 586 */ 587 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 588 #endif 589 590 /* 591 * ptvmmap is used for reading arbitrary physical pages via 592 * /dev/mem. 593 */ 594 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 595 596 /* 597 * msgbufp is used to map the system message buffer. 598 * XXX msgbufmap is not used. 599 */ 600 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 601 atop(round_page(MSGBUF_SIZE))) 602 603 virtual_start = va; 604 605 *CMAP1 = 0; 606 /* Not ready to do an invltlb yet for VMM*/ 607 if (!vmm_enabled) 608 cpu_invltlb(); 609 610 } 611 612 /* 613 * Initialize the pmap module. 614 * Called by vm_init, to initialize any structures that the pmap 615 * system needs to map virtual memory. 616 * pmap_init has been enhanced to support in a fairly consistant 617 * way, discontiguous physical memory. 618 */ 619 void 620 pmap_init(void) 621 { 622 int i; 623 int initial_pvs; 624 625 /* 626 * object for kernel page table pages 627 */ 628 /* JG I think the number can be arbitrary */ 629 kptobj = vm_object_allocate(OBJT_DEFAULT, 5); 630 kernel_pmap.pm_pteobj = kptobj; 631 632 /* 633 * Allocate memory for random pmap data structures. Includes the 634 * pv_head_table. 635 */ 636 for(i = 0; i < vm_page_array_size; i++) { 637 vm_page_t m; 638 639 m = &vm_page_array[i]; 640 TAILQ_INIT(&m->md.pv_list); 641 m->md.pv_list_count = 0; 642 } 643 644 /* 645 * init the pv free list 646 */ 647 initial_pvs = vm_page_array_size; 648 if (initial_pvs < MINPV) 649 initial_pvs = MINPV; 650 pvzone = &pvzone_store; 651 pvinit = (struct pv_entry *) kmem_alloc(&kernel_map, 652 initial_pvs * sizeof (struct pv_entry)); 653 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 654 initial_pvs); 655 656 /* 657 * Now it is safe to enable pv_table recording. 658 */ 659 pmap_initialized = TRUE; 660 } 661 662 /* 663 * Initialize the address space (zone) for the pv_entries. Set a 664 * high water mark so that the system can recover from excessive 665 * numbers of pv entries. 666 */ 667 void 668 pmap_init2(void) 669 { 670 int shpgperproc = PMAP_SHPGPERPROC; 671 672 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 673 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 674 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 675 pv_entry_high_water = 9 * (pv_entry_max / 10); 676 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); 677 } 678 679 680 /*************************************************** 681 * Low level helper routines..... 682 ***************************************************/ 683 684 /* 685 * The modification bit is not tracked for any pages in this range. XXX 686 * such pages in this maps should always use pmap_k*() functions and not 687 * be managed anyhow. 688 * 689 * XXX User and kernel address spaces are independant for virtual kernels, 690 * this function only applies to the kernel pmap. 691 */ 692 static int 693 pmap_track_modified(pmap_t pmap, vm_offset_t va) 694 { 695 if (pmap != &kernel_pmap) 696 return 1; 697 if ((va < clean_sva) || (va >= clean_eva)) 698 return 1; 699 else 700 return 0; 701 } 702 703 /* 704 * Extract the physical page address associated with the map/VA pair. 705 * 706 * No requirements. 707 */ 708 vm_paddr_t 709 pmap_extract(pmap_t pmap, vm_offset_t va) 710 { 711 vm_paddr_t rtval; 712 pt_entry_t *pte; 713 pd_entry_t pde, *pdep; 714 715 lwkt_gettoken(&vm_token); 716 rtval = 0; 717 pdep = pmap_pde(pmap, va); 718 if (pdep != NULL) { 719 pde = *pdep; 720 if (pde) { 721 if ((pde & VPTE_PS) != 0) { 722 /* JGV */ 723 rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 724 } else { 725 pte = pmap_pde_to_pte(pdep, va); 726 rtval = (*pte & VPTE_FRAME) | (va & PAGE_MASK); 727 } 728 } 729 } 730 lwkt_reltoken(&vm_token); 731 return rtval; 732 } 733 734 /* 735 * Similar to extract but checks protections, SMP-friendly short-cut for 736 * vm_fault_page[_quick](). 737 */ 738 vm_page_t 739 pmap_fault_page_quick(pmap_t pmap __unused, vm_offset_t vaddr __unused, 740 vm_prot_t prot __unused) 741 { 742 return(NULL); 743 } 744 745 /* 746 * Routine: pmap_kextract 747 * Function: 748 * Extract the physical page address associated 749 * kernel virtual address. 750 */ 751 vm_paddr_t 752 pmap_kextract(vm_offset_t va) 753 { 754 pd_entry_t pde; 755 vm_paddr_t pa; 756 757 KKASSERT(va >= KvaStart && va < KvaEnd); 758 759 /* 760 * The DMAP region is not included in [KvaStart, KvaEnd) 761 */ 762 #if 0 763 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 764 pa = DMAP_TO_PHYS(va); 765 } else { 766 #endif 767 pde = *vtopde(va); 768 if (pde & VPTE_PS) { 769 /* JGV */ 770 pa = (pde & PG_PS_FRAME) | (va & PDRMASK); 771 } else { 772 /* 773 * Beware of a concurrent promotion that changes the 774 * PDE at this point! For example, vtopte() must not 775 * be used to access the PTE because it would use the 776 * new PDE. It is, however, safe to use the old PDE 777 * because the page table page is preserved by the 778 * promotion. 779 */ 780 pa = *pmap_pde_to_pte(&pde, va); 781 pa = (pa & VPTE_FRAME) | (va & PAGE_MASK); 782 } 783 #if 0 784 } 785 #endif 786 return pa; 787 } 788 789 /*************************************************** 790 * Low level mapping routines..... 791 ***************************************************/ 792 793 /* 794 * Enter a mapping into kernel_pmap. Mappings created in this fashion 795 * are not managed. Mappings must be immediately accessible on all cpus. 796 * 797 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the 798 * real pmap and handle related races before storing the new vpte. The 799 * new semantics for kenter require use to do an UNCONDITIONAL invalidation, 800 * because the entry may have previously been cleared without an invalidation. 801 */ 802 void 803 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 804 { 805 pt_entry_t *pte; 806 pt_entry_t npte; 807 808 KKASSERT(va >= KvaStart && va < KvaEnd); 809 npte = pa | VPTE_RW | VPTE_V | VPTE_U; 810 pte = vtopte(va); 811 812 #if 1 813 *pte = 0; 814 pmap_inval_pte(pte, &kernel_pmap, va); 815 #else 816 if (*pte & VPTE_V) 817 pmap_inval_pte(pte, &kernel_pmap, va); 818 #endif 819 *pte = npte; 820 } 821 822 /* 823 * Enter an unmanaged KVA mapping for the private use of the current 824 * cpu only. 825 * 826 * It is illegal for the mapping to be accessed by other cpus without 827 * proper invalidation. 828 */ 829 int 830 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) 831 { 832 pt_entry_t *ptep; 833 pt_entry_t npte; 834 int res; 835 836 KKASSERT(va >= KvaStart && va < KvaEnd); 837 838 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 839 ptep = vtopte(va); 840 #if 1 841 res = 1; 842 #else 843 /* FUTURE */ 844 res = (*ptep != 0); 845 #endif 846 847 if (*ptep & VPTE_V) 848 pmap_inval_pte_quick(ptep, &kernel_pmap, va); 849 *ptep = npte; 850 851 return res; 852 } 853 854 int 855 pmap_kenter_noinval(vm_offset_t va, vm_paddr_t pa) 856 { 857 pt_entry_t *ptep; 858 pt_entry_t npte; 859 int res; 860 861 KKASSERT(va >= KvaStart && va < KvaEnd); 862 863 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 864 ptep = vtopte(va); 865 #if 1 866 res = 1; 867 #else 868 /* FUTURE */ 869 res = (*ptep != 0); 870 #endif 871 872 *ptep = npte; 873 874 return res; 875 } 876 877 /* 878 * Remove an unmanaged mapping created with pmap_kenter*(). 879 */ 880 void 881 pmap_kremove(vm_offset_t va) 882 { 883 pt_entry_t *pte; 884 885 KKASSERT(va >= KvaStart && va < KvaEnd); 886 887 pte = vtopte(va); 888 *pte = 0; 889 pmap_inval_pte(pte, &kernel_pmap, va); 890 } 891 892 /* 893 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize 894 * only with this cpu. 895 * 896 * Unfortunately because we optimize new entries by testing VPTE_V later 897 * on, we actually still have to synchronize with all the cpus. XXX maybe 898 * store a junk value and test against 0 in the other places instead? 899 */ 900 void 901 pmap_kremove_quick(vm_offset_t va) 902 { 903 pt_entry_t *pte; 904 905 KKASSERT(va >= KvaStart && va < KvaEnd); 906 907 pte = vtopte(va); 908 *pte = 0; 909 pmap_inval_pte(pte, &kernel_pmap, va); /* NOT _quick */ 910 } 911 912 void 913 pmap_kremove_noinval(vm_offset_t va) 914 { 915 pt_entry_t *pte; 916 917 KKASSERT(va >= KvaStart && va < KvaEnd); 918 919 pte = vtopte(va); 920 *pte = 0; 921 } 922 923 /* 924 * Used to map a range of physical addresses into kernel 925 * virtual address space. 926 * 927 * For now, VM is already on, we only need to map the 928 * specified memory. 929 */ 930 vm_offset_t 931 pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) 932 { 933 return PHYS_TO_DMAP(start); 934 } 935 936 /* 937 * Map a set of unmanaged VM pages into KVM. 938 */ 939 void 940 pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 941 { 942 vm_offset_t end_va; 943 944 end_va = va + count * PAGE_SIZE; 945 KKASSERT(va >= KvaStart && end_va < KvaEnd); 946 947 while (va < end_va) { 948 pt_entry_t *pte; 949 950 pte = vtopte(va); 951 *pte = 0; 952 pmap_inval_pte(pte, &kernel_pmap, va); 953 *pte = VM_PAGE_TO_PHYS(*m) | VPTE_RW | VPTE_V | VPTE_U; 954 va += PAGE_SIZE; 955 m++; 956 } 957 } 958 959 /* 960 * Undo the effects of pmap_qenter*(). 961 */ 962 void 963 pmap_qremove(vm_offset_t va, int count) 964 { 965 vm_offset_t end_va; 966 967 end_va = va + count * PAGE_SIZE; 968 KKASSERT(va >= KvaStart && end_va < KvaEnd); 969 970 while (va < end_va) { 971 pt_entry_t *pte; 972 973 pte = vtopte(va); 974 atomic_swap_long(pte, 0); 975 pmap_inval_pte(pte, &kernel_pmap, va); 976 va += PAGE_SIZE; 977 } 978 } 979 980 void 981 pmap_qremove_quick(vm_offset_t va, int count) 982 { 983 vm_offset_t end_va; 984 985 end_va = va + count * PAGE_SIZE; 986 KKASSERT(va >= KvaStart && end_va < KvaEnd); 987 988 while (va < end_va) { 989 pt_entry_t *pte; 990 991 pte = vtopte(va); 992 atomic_swap_long(pte, 0); 993 cpu_invlpg((void *)va); 994 va += PAGE_SIZE; 995 } 996 } 997 998 void 999 pmap_qremove_noinval(vm_offset_t va, int count) 1000 { 1001 vm_offset_t end_va; 1002 1003 end_va = va + count * PAGE_SIZE; 1004 KKASSERT(va >= KvaStart && end_va < KvaEnd); 1005 1006 while (va < end_va) { 1007 pt_entry_t *pte; 1008 1009 pte = vtopte(va); 1010 atomic_swap_long(pte, 0); 1011 va += PAGE_SIZE; 1012 } 1013 } 1014 1015 /* 1016 * This routine works like vm_page_lookup() but also blocks as long as the 1017 * page is busy. This routine does not busy the page it returns. 1018 * 1019 * Unless the caller is managing objects whos pages are in a known state, 1020 * the call should be made with a critical section held so the page's object 1021 * association remains valid on return. 1022 */ 1023 static vm_page_t 1024 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 1025 { 1026 vm_page_t m; 1027 1028 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 1029 m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp"); 1030 1031 return(m); 1032 } 1033 1034 /* 1035 * Create a new thread and optionally associate it with a (new) process. 1036 * NOTE! the new thread's cpu may not equal the current cpu. 1037 */ 1038 void 1039 pmap_init_thread(thread_t td) 1040 { 1041 /* enforce pcb placement */ 1042 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; 1043 td->td_savefpu = &td->td_pcb->pcb_save; 1044 td->td_sp = (char *)td->td_pcb - 16; /* JG is -16 needed on x86_64? */ 1045 } 1046 1047 /* 1048 * This routine directly affects the fork perf for a process. 1049 */ 1050 void 1051 pmap_init_proc(struct proc *p) 1052 { 1053 } 1054 1055 /*************************************************** 1056 * Page table page management routines..... 1057 ***************************************************/ 1058 1059 static __inline int pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, 1060 vm_page_t m); 1061 1062 /* 1063 * This routine unholds page table pages, and if the hold count 1064 * drops to zero, then it decrements the wire count. 1065 * 1066 * We must recheck that this is the last hold reference after busy-sleeping 1067 * on the page. 1068 */ 1069 static int 1070 _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 1071 { 1072 vm_page_busy_wait(m, FALSE, "pmuwpt"); 1073 KASSERT(m->queue == PQ_NONE, 1074 ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m)); 1075 1076 if (m->hold_count == 1) { 1077 /* 1078 * Unmap the page table page. 1079 */ 1080 //abort(); /* JG */ 1081 /* pmap_inval_add(info, pmap, -1); */ 1082 1083 if (m->pindex >= (NUPDE + NUPDPE)) { 1084 /* PDP page */ 1085 pml4_entry_t *pml4; 1086 pml4 = pmap_pml4e(pmap, va); 1087 *pml4 = 0; 1088 } else if (m->pindex >= NUPDE) { 1089 /* PD page */ 1090 pdp_entry_t *pdp; 1091 pdp = pmap_pdpe(pmap, va); 1092 *pdp = 0; 1093 } else { 1094 /* PT page */ 1095 pd_entry_t *pd; 1096 pd = pmap_pde(pmap, va); 1097 *pd = 0; 1098 } 1099 1100 KKASSERT(pmap->pm_stats.resident_count > 0); 1101 --pmap->pm_stats.resident_count; 1102 1103 if (pmap->pm_ptphint == m) 1104 pmap->pm_ptphint = NULL; 1105 1106 if (m->pindex < NUPDE) { 1107 /* We just released a PT, unhold the matching PD */ 1108 vm_page_t pdpg; 1109 1110 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & VPTE_FRAME); 1111 pmap_unwire_pte_hold(pmap, va, pdpg); 1112 } 1113 if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { 1114 /* We just released a PD, unhold the matching PDP */ 1115 vm_page_t pdppg; 1116 1117 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & VPTE_FRAME); 1118 pmap_unwire_pte_hold(pmap, va, pdppg); 1119 } 1120 1121 /* 1122 * This was our last hold, the page had better be unwired 1123 * after we decrement wire_count. 1124 * 1125 * FUTURE NOTE: shared page directory page could result in 1126 * multiple wire counts. 1127 */ 1128 vm_page_unhold(m); 1129 --m->wire_count; 1130 KKASSERT(m->wire_count == 0); 1131 atomic_add_int(&vmstats.v_wire_count, -1); 1132 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1133 vm_page_flash(m); 1134 vm_page_free_zero(m); 1135 return 1; 1136 } else { 1137 KKASSERT(m->hold_count > 1); 1138 vm_page_unhold(m); 1139 vm_page_wakeup(m); 1140 return 0; 1141 } 1142 } 1143 1144 static __inline int 1145 pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 1146 { 1147 KKASSERT(m->hold_count > 0); 1148 if (m->hold_count > 1) { 1149 vm_page_unhold(m); 1150 return 0; 1151 } else { 1152 return _pmap_unwire_pte_hold(pmap, va, m); 1153 } 1154 } 1155 1156 /* 1157 * After removing a page table entry, this routine is used to 1158 * conditionally free the page, and manage the hold/wire counts. 1159 */ 1160 static int 1161 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1162 { 1163 /* JG Use FreeBSD/amd64 or FreeBSD/i386 ptepde approaches? */ 1164 vm_pindex_t ptepindex; 1165 1166 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1167 1168 if (mpte == NULL) { 1169 /* 1170 * page table pages in the kernel_pmap are not managed. 1171 */ 1172 if (pmap == &kernel_pmap) 1173 return(0); 1174 ptepindex = pmap_pde_pindex(va); 1175 if (pmap->pm_ptphint && 1176 (pmap->pm_ptphint->pindex == ptepindex)) { 1177 mpte = pmap->pm_ptphint; 1178 } else { 1179 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1180 pmap->pm_ptphint = mpte; 1181 vm_page_wakeup(mpte); 1182 } 1183 } 1184 1185 return pmap_unwire_pte_hold(pmap, va, mpte); 1186 } 1187 1188 /* 1189 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we 1190 * just dummy it up so it works well enough for fork(). 1191 * 1192 * In DragonFly, process pmaps may only be used to manipulate user address 1193 * space, never kernel address space. 1194 */ 1195 void 1196 pmap_pinit0(struct pmap *pmap) 1197 { 1198 pmap_pinit(pmap); 1199 } 1200 1201 /* 1202 * Initialize a preallocated and zeroed pmap structure, 1203 * such as one in a vmspace structure. 1204 */ 1205 void 1206 pmap_pinit(struct pmap *pmap) 1207 { 1208 vm_page_t ptdpg; 1209 1210 /* 1211 * No need to allocate page table space yet but we do need a valid 1212 * page directory table. 1213 */ 1214 if (pmap->pm_pml4 == NULL) { 1215 pmap->pm_pml4 = 1216 (pml4_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); 1217 } 1218 1219 /* 1220 * Allocate an object for the ptes 1221 */ 1222 if (pmap->pm_pteobj == NULL) 1223 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + PML4PML4I + 1); 1224 1225 /* 1226 * Allocate the page directory page, unless we already have 1227 * one cached. If we used the cached page the wire_count will 1228 * already be set appropriately. 1229 */ 1230 if ((ptdpg = pmap->pm_pdirm) == NULL) { 1231 ptdpg = vm_page_grab(pmap->pm_pteobj, 1232 NUPDE + NUPDPE + PML4PML4I, 1233 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | 1234 VM_ALLOC_ZERO); 1235 pmap->pm_pdirm = ptdpg; 1236 vm_page_flag_clear(ptdpg, PG_MAPPED); 1237 vm_page_wire(ptdpg); 1238 vm_page_wakeup(ptdpg); 1239 pmap_kenter((vm_offset_t)pmap->pm_pml4, VM_PAGE_TO_PHYS(ptdpg)); 1240 } 1241 pmap->pm_count = 1; 1242 CPUMASK_ASSZERO(pmap->pm_active); 1243 pmap->pm_ptphint = NULL; 1244 TAILQ_INIT(&pmap->pm_pvlist); 1245 TAILQ_INIT(&pmap->pm_pvlist_free); 1246 spin_init(&pmap->pm_spin, "pmapinit"); 1247 lwkt_token_init(&pmap->pm_token, "pmap_tok"); 1248 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1249 pmap->pm_stats.resident_count = 1; 1250 } 1251 1252 /* 1253 * Clean up a pmap structure so it can be physically freed. This routine 1254 * is called by the vmspace dtor function. A great deal of pmap data is 1255 * left passively mapped to improve vmspace management so we have a bit 1256 * of cleanup work to do here. 1257 * 1258 * No requirements. 1259 */ 1260 void 1261 pmap_puninit(pmap_t pmap) 1262 { 1263 vm_page_t p; 1264 1265 KKASSERT(CPUMASK_TESTZERO(pmap->pm_active)); 1266 if ((p = pmap->pm_pdirm) != NULL) { 1267 KKASSERT(pmap->pm_pml4 != NULL); 1268 pmap_kremove((vm_offset_t)pmap->pm_pml4); 1269 vm_page_busy_wait(p, FALSE, "pgpun"); 1270 p->wire_count--; 1271 atomic_add_int(&vmstats.v_wire_count, -1); 1272 vm_page_free_zero(p); 1273 pmap->pm_pdirm = NULL; 1274 } 1275 if (pmap->pm_pml4) { 1276 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE); 1277 pmap->pm_pml4 = NULL; 1278 } 1279 if (pmap->pm_pteobj) { 1280 vm_object_deallocate(pmap->pm_pteobj); 1281 pmap->pm_pteobj = NULL; 1282 } 1283 } 1284 1285 /* 1286 * Wire in kernel global address entries. To avoid a race condition 1287 * between pmap initialization and pmap_growkernel, this procedure 1288 * adds the pmap to the master list (which growkernel scans to update), 1289 * then copies the template. 1290 * 1291 * In a virtual kernel there are no kernel global address entries. 1292 * 1293 * No requirements. 1294 */ 1295 void 1296 pmap_pinit2(struct pmap *pmap) 1297 { 1298 spin_lock(&pmap_spin); 1299 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode); 1300 spin_unlock(&pmap_spin); 1301 } 1302 1303 /* 1304 * Attempt to release and free a vm_page in a pmap. Returns 1 on success, 1305 * 0 on failure (if the procedure had to sleep). 1306 * 1307 * When asked to remove the page directory page itself, we actually just 1308 * leave it cached so we do not have to incur the SMP inval overhead of 1309 * removing the kernel mapping. pmap_puninit() will take care of it. 1310 */ 1311 static int 1312 pmap_release_free_page(struct pmap *pmap, vm_page_t p) 1313 { 1314 /* 1315 * This code optimizes the case of freeing non-busy 1316 * page-table pages. Those pages are zero now, and 1317 * might as well be placed directly into the zero queue. 1318 */ 1319 if (vm_page_busy_try(p, FALSE)) { 1320 vm_page_sleep_busy(p, FALSE, "pmaprl"); 1321 return 0; 1322 } 1323 1324 /* 1325 * Remove the page table page from the processes address space. 1326 */ 1327 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1328 /* 1329 * We are the pml4 table itself. 1330 */ 1331 /* XXX anything to do here? */ 1332 } else if (p->pindex >= (NUPDE + NUPDPE)) { 1333 /* 1334 * We are a PDP page. 1335 * We look for the PML4 entry that points to us. 1336 */ 1337 vm_page_t m4 = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + PML4PML4I); 1338 KKASSERT(m4 != NULL); 1339 pml4_entry_t *pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m4)); 1340 int idx = (p->pindex - (NUPDE + NUPDPE)) % NPML4EPG; 1341 KKASSERT(pml4[idx] != 0); 1342 pml4[idx] = 0; 1343 m4->hold_count--; 1344 /* JG What about wire_count? */ 1345 } else if (p->pindex >= NUPDE) { 1346 /* 1347 * We are a PD page. 1348 * We look for the PDP entry that points to us. 1349 */ 1350 vm_page_t m3 = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + (p->pindex - NUPDE) / NPDPEPG); 1351 KKASSERT(m3 != NULL); 1352 pdp_entry_t *pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m3)); 1353 int idx = (p->pindex - NUPDE) % NPDPEPG; 1354 KKASSERT(pdp[idx] != 0); 1355 pdp[idx] = 0; 1356 m3->hold_count--; 1357 /* JG What about wire_count? */ 1358 } else { 1359 /* We are a PT page. 1360 * We look for the PD entry that points to us. 1361 */ 1362 vm_page_t m2 = vm_page_lookup(pmap->pm_pteobj, NUPDE + p->pindex / NPDEPG); 1363 KKASSERT(m2 != NULL); 1364 pd_entry_t *pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m2)); 1365 int idx = p->pindex % NPDEPG; 1366 pd[idx] = 0; 1367 m2->hold_count--; 1368 /* JG What about wire_count? */ 1369 } 1370 KKASSERT(pmap->pm_stats.resident_count > 0); 1371 --pmap->pm_stats.resident_count; 1372 1373 if (p->hold_count) { 1374 panic("pmap_release: freeing held pt page " 1375 "pmap=%p pg=%p dmap=%p pi=%ld {%ld,%ld,%ld}", 1376 pmap, p, (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(p)), 1377 p->pindex, NUPDE, NUPDPE, PML4PML4I); 1378 } 1379 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) 1380 pmap->pm_ptphint = NULL; 1381 1382 /* 1383 * We leave the top-level page table page cached, wired, and mapped in 1384 * the pmap until the dtor function (pmap_puninit()) gets called. 1385 * However, still clean it up. 1386 */ 1387 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1388 bzero(pmap->pm_pml4, PAGE_SIZE); 1389 vm_page_wakeup(p); 1390 } else { 1391 abort(); 1392 p->wire_count--; 1393 atomic_add_int(&vmstats.v_wire_count, -1); 1394 /* JG eventually revert to using vm_page_free_zero() */ 1395 vm_page_free(p); 1396 } 1397 return 1; 1398 } 1399 1400 /* 1401 * this routine is called if the page table page is not 1402 * mapped correctly. 1403 */ 1404 static vm_page_t 1405 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex) 1406 { 1407 vm_page_t m, pdppg, pdpg; 1408 1409 /* 1410 * Find or fabricate a new pagetable page. Handle allocation 1411 * races by checking m->valid. 1412 */ 1413 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1414 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1415 1416 KASSERT(m->queue == PQ_NONE, 1417 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1418 1419 /* 1420 * Increment the hold count for the page we will be returning to 1421 * the caller. 1422 */ 1423 m->hold_count++; 1424 vm_page_wire(m); 1425 1426 /* 1427 * Map the pagetable page into the process address space, if 1428 * it isn't already there. 1429 */ 1430 ++pmap->pm_stats.resident_count; 1431 1432 if (ptepindex >= (NUPDE + NUPDPE)) { 1433 pml4_entry_t *pml4; 1434 vm_pindex_t pml4index; 1435 1436 /* Wire up a new PDP page */ 1437 pml4index = ptepindex - (NUPDE + NUPDPE); 1438 pml4 = &pmap->pm_pml4[pml4index]; 1439 *pml4 = VM_PAGE_TO_PHYS(m) | 1440 VPTE_RW | VPTE_V | VPTE_U | 1441 VPTE_A | VPTE_M; 1442 } else if (ptepindex >= NUPDE) { 1443 vm_pindex_t pml4index; 1444 vm_pindex_t pdpindex; 1445 pml4_entry_t *pml4; 1446 pdp_entry_t *pdp; 1447 1448 /* Wire up a new PD page */ 1449 pdpindex = ptepindex - NUPDE; 1450 pml4index = pdpindex >> NPML4EPGSHIFT; 1451 1452 pml4 = &pmap->pm_pml4[pml4index]; 1453 if ((*pml4 & VPTE_V) == 0) { 1454 /* Have to allocate a new PDP page, recurse */ 1455 if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index) 1456 == NULL) { 1457 --m->wire_count; 1458 vm_page_free(m); 1459 return (NULL); 1460 } 1461 } else { 1462 /* Add reference to the PDP page */ 1463 pdppg = PHYS_TO_VM_PAGE(*pml4 & VPTE_FRAME); 1464 pdppg->hold_count++; 1465 } 1466 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1467 1468 /* Now find the pdp page */ 1469 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1470 KKASSERT(*pdp == 0); /* JG DEBUG64 */ 1471 *pdp = VM_PAGE_TO_PHYS(m) | VPTE_RW | VPTE_V | VPTE_U | 1472 VPTE_A | VPTE_M; 1473 } else { 1474 vm_pindex_t pml4index; 1475 vm_pindex_t pdpindex; 1476 pml4_entry_t *pml4; 1477 pdp_entry_t *pdp; 1478 pd_entry_t *pd; 1479 1480 /* Wire up a new PT page */ 1481 pdpindex = ptepindex >> NPDPEPGSHIFT; 1482 pml4index = pdpindex >> NPML4EPGSHIFT; 1483 1484 /* First, find the pdp and check that its valid. */ 1485 pml4 = &pmap->pm_pml4[pml4index]; 1486 if ((*pml4 & VPTE_V) == 0) { 1487 /* We miss a PDP page. We ultimately need a PD page. 1488 * Recursively allocating a PD page will allocate 1489 * the missing PDP page and will also allocate 1490 * the PD page we need. 1491 */ 1492 /* Have to allocate a new PD page, recurse */ 1493 if (_pmap_allocpte(pmap, NUPDE + pdpindex) 1494 == NULL) { 1495 --m->wire_count; 1496 vm_page_free(m); 1497 return (NULL); 1498 } 1499 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1500 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1501 } else { 1502 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1503 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1504 if ((*pdp & VPTE_V) == 0) { 1505 /* Have to allocate a new PD page, recurse */ 1506 if (_pmap_allocpte(pmap, NUPDE + pdpindex) 1507 == NULL) { 1508 --m->wire_count; 1509 vm_page_free(m); 1510 return (NULL); 1511 } 1512 } else { 1513 /* Add reference to the PD page */ 1514 pdpg = PHYS_TO_VM_PAGE(*pdp & VPTE_FRAME); 1515 pdpg->hold_count++; 1516 } 1517 } 1518 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & VPTE_FRAME); 1519 1520 /* Now we know where the page directory page is */ 1521 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; 1522 KKASSERT(*pd == 0); /* JG DEBUG64 */ 1523 *pd = VM_PAGE_TO_PHYS(m) | VPTE_RW | VPTE_V | VPTE_U | 1524 VPTE_A | VPTE_M; 1525 } 1526 1527 /* 1528 * Set the page table hint 1529 */ 1530 pmap->pm_ptphint = m; 1531 vm_page_flag_set(m, PG_MAPPED); 1532 vm_page_wakeup(m); 1533 1534 return m; 1535 } 1536 1537 /* 1538 * Determine the page table page required to access the VA in the pmap 1539 * and allocate it if necessary. Return a held vm_page_t for the page. 1540 * 1541 * Only used with user pmaps. 1542 */ 1543 static vm_page_t 1544 pmap_allocpte(pmap_t pmap, vm_offset_t va) 1545 { 1546 vm_pindex_t ptepindex; 1547 pd_entry_t *pd; 1548 vm_page_t m; 1549 1550 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1551 1552 /* 1553 * Calculate pagetable page index 1554 */ 1555 ptepindex = pmap_pde_pindex(va); 1556 1557 /* 1558 * Get the page directory entry 1559 */ 1560 pd = pmap_pde(pmap, va); 1561 1562 /* 1563 * This supports switching from a 2MB page to a 1564 * normal 4K page. 1565 */ 1566 if (pd != NULL && (*pd & (VPTE_PS | VPTE_V)) == (VPTE_PS | VPTE_V)) { 1567 panic("no promotion/demotion yet"); 1568 *pd = 0; 1569 pd = NULL; 1570 /*cpu_invltlb();*/ 1571 /*smp_invltlb();*/ 1572 } 1573 1574 /* 1575 * If the page table page is mapped, we just increment the 1576 * hold count, and activate it. 1577 */ 1578 if (pd != NULL && (*pd & VPTE_V) != 0) { 1579 /* YYY hint is used here on i386 */ 1580 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1581 pmap->pm_ptphint = m; 1582 vm_page_hold(m); 1583 vm_page_wakeup(m); 1584 return m; 1585 } 1586 /* 1587 * Here if the pte page isn't mapped, or if it has been deallocated. 1588 */ 1589 return _pmap_allocpte(pmap, ptepindex); 1590 } 1591 1592 1593 /*************************************************** 1594 * Pmap allocation/deallocation routines. 1595 ***************************************************/ 1596 1597 /* 1598 * Release any resources held by the given physical map. 1599 * Called when a pmap initialized by pmap_pinit is being released. 1600 * Should only be called if the map contains no valid mappings. 1601 * 1602 * Caller must hold pmap->pm_token 1603 */ 1604 static int pmap_release_callback(struct vm_page *p, void *data); 1605 1606 void 1607 pmap_release(struct pmap *pmap) 1608 { 1609 vm_object_t object = pmap->pm_pteobj; 1610 struct rb_vm_page_scan_info info; 1611 1612 KKASSERT(pmap != &kernel_pmap); 1613 1614 lwkt_gettoken(&vm_token); 1615 #if defined(DIAGNOSTIC) 1616 if (object->ref_count != 1) 1617 panic("pmap_release: pteobj reference count != 1"); 1618 #endif 1619 1620 info.pmap = pmap; 1621 info.object = object; 1622 1623 KASSERT(CPUMASK_TESTZERO(pmap->pm_active), 1624 ("pmap %p still active! %016jx", 1625 pmap, 1626 (uintmax_t)CPUMASK_LOWMASK(pmap->pm_active))); 1627 1628 spin_lock(&pmap_spin); 1629 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode); 1630 spin_unlock(&pmap_spin); 1631 1632 vm_object_hold(object); 1633 do { 1634 info.error = 0; 1635 info.mpte = NULL; 1636 info.limit = object->generation; 1637 1638 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, 1639 pmap_release_callback, &info); 1640 if (info.error == 0 && info.mpte) { 1641 if (!pmap_release_free_page(pmap, info.mpte)) 1642 info.error = 1; 1643 } 1644 } while (info.error); 1645 vm_object_drop(object); 1646 lwkt_reltoken(&vm_token); 1647 } 1648 1649 static int 1650 pmap_release_callback(struct vm_page *p, void *data) 1651 { 1652 struct rb_vm_page_scan_info *info = data; 1653 1654 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1655 info->mpte = p; 1656 return(0); 1657 } 1658 if (!pmap_release_free_page(info->pmap, p)) { 1659 info->error = 1; 1660 return(-1); 1661 } 1662 if (info->object->generation != info->limit) { 1663 info->error = 1; 1664 return(-1); 1665 } 1666 return(0); 1667 } 1668 1669 /* 1670 * Grow the number of kernel page table entries, if needed. 1671 * 1672 * No requirements. 1673 */ 1674 void 1675 pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) 1676 { 1677 vm_offset_t addr; 1678 vm_paddr_t paddr; 1679 vm_offset_t ptppaddr; 1680 vm_page_t nkpg; 1681 pd_entry_t *pde, newpdir; 1682 pdp_entry_t newpdp; 1683 1684 addr = kend; 1685 1686 vm_object_hold(kptobj); 1687 if (kernel_vm_end == 0) { 1688 kernel_vm_end = KvaStart; 1689 nkpt = 0; 1690 while ((*pmap_pde(&kernel_pmap, kernel_vm_end) & VPTE_V) != 0) { 1691 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1692 nkpt++; 1693 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1694 kernel_vm_end = kernel_map.max_offset; 1695 break; 1696 } 1697 } 1698 } 1699 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1700 if (addr - 1 >= kernel_map.max_offset) 1701 addr = kernel_map.max_offset; 1702 while (kernel_vm_end < addr) { 1703 pde = pmap_pde(&kernel_pmap, kernel_vm_end); 1704 if (pde == NULL) { 1705 /* We need a new PDP entry */ 1706 nkpg = vm_page_alloc(kptobj, nkpt, 1707 VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM 1708 | VM_ALLOC_INTERRUPT); 1709 if (nkpg == NULL) { 1710 panic("pmap_growkernel: no memory to " 1711 "grow kernel"); 1712 } 1713 paddr = VM_PAGE_TO_PHYS(nkpg); 1714 pmap_zero_page(paddr); 1715 newpdp = (pdp_entry_t)(paddr | 1716 VPTE_V | VPTE_RW | VPTE_U | 1717 VPTE_A | VPTE_M); 1718 *pmap_pdpe(&kernel_pmap, kernel_vm_end) = newpdp; 1719 nkpt++; 1720 continue; /* try again */ 1721 } 1722 if ((*pde & VPTE_V) != 0) { 1723 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1724 ~(PAGE_SIZE * NPTEPG - 1); 1725 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1726 kernel_vm_end = kernel_map.max_offset; 1727 break; 1728 } 1729 continue; 1730 } 1731 1732 /* 1733 * This index is bogus, but out of the way 1734 */ 1735 nkpg = vm_page_alloc(kptobj, nkpt, 1736 VM_ALLOC_NORMAL | 1737 VM_ALLOC_SYSTEM | 1738 VM_ALLOC_INTERRUPT); 1739 if (nkpg == NULL) 1740 panic("pmap_growkernel: no memory to grow kernel"); 1741 1742 vm_page_wire(nkpg); 1743 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1744 pmap_zero_page(ptppaddr); 1745 newpdir = (pd_entry_t)(ptppaddr | 1746 VPTE_V | VPTE_RW | VPTE_U | 1747 VPTE_A | VPTE_M); 1748 *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir; 1749 nkpt++; 1750 1751 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1752 ~(PAGE_SIZE * NPTEPG - 1); 1753 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1754 kernel_vm_end = kernel_map.max_offset; 1755 break; 1756 } 1757 } 1758 vm_object_drop(kptobj); 1759 } 1760 1761 /* 1762 * Add a reference to the specified pmap. 1763 * 1764 * No requirements. 1765 */ 1766 void 1767 pmap_reference(pmap_t pmap) 1768 { 1769 if (pmap) { 1770 lwkt_gettoken(&vm_token); 1771 ++pmap->pm_count; 1772 lwkt_reltoken(&vm_token); 1773 } 1774 } 1775 1776 /************************************************************************ 1777 * VMSPACE MANAGEMENT * 1778 ************************************************************************ 1779 * 1780 * The VMSPACE management we do in our virtual kernel must be reflected 1781 * in the real kernel. This is accomplished by making vmspace system 1782 * calls to the real kernel. 1783 */ 1784 void 1785 cpu_vmspace_alloc(struct vmspace *vm) 1786 { 1787 int r; 1788 void *rp; 1789 vpte_t vpte; 1790 1791 /* 1792 * If VMM enable, don't do nothing, we 1793 * are able to use real page tables 1794 */ 1795 if (vmm_enabled) 1796 return; 1797 1798 #define USER_SIZE (VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) 1799 1800 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) 1801 panic("vmspace_create() failed"); 1802 1803 rp = vmspace_mmap(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1804 PROT_READ|PROT_WRITE, 1805 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, 1806 MemImageFd, 0); 1807 if (rp == MAP_FAILED) 1808 panic("vmspace_mmap: failed"); 1809 vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1810 MADV_NOSYNC, 0); 1811 vpte = VM_PAGE_TO_PHYS(vmspace_pmap(vm)->pm_pdirm) | VPTE_RW | VPTE_V | VPTE_U; 1812 r = vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1813 MADV_SETMAP, vpte); 1814 if (r < 0) 1815 panic("vmspace_mcontrol: failed"); 1816 } 1817 1818 void 1819 cpu_vmspace_free(struct vmspace *vm) 1820 { 1821 /* 1822 * If VMM enable, don't do nothing, we 1823 * are able to use real page tables 1824 */ 1825 if (vmm_enabled) 1826 return; 1827 1828 if (vmspace_destroy(&vm->vm_pmap) < 0) 1829 panic("vmspace_destroy() failed"); 1830 } 1831 1832 /*************************************************** 1833 * page management routines. 1834 ***************************************************/ 1835 1836 /* 1837 * free the pv_entry back to the free list. This function may be 1838 * called from an interrupt. 1839 */ 1840 static __inline void 1841 free_pv_entry(pv_entry_t pv) 1842 { 1843 pv_entry_count--; 1844 KKASSERT(pv_entry_count >= 0); 1845 zfree(pvzone, pv); 1846 } 1847 1848 /* 1849 * get a new pv_entry, allocating a block from the system 1850 * when needed. This function may be called from an interrupt. 1851 */ 1852 static pv_entry_t 1853 get_pv_entry(void) 1854 { 1855 pv_entry_count++; 1856 if (pv_entry_high_water && 1857 (pv_entry_count > pv_entry_high_water) && 1858 (pmap_pagedaemon_waken == 0)) { 1859 pmap_pagedaemon_waken = 1; 1860 wakeup(&vm_pages_needed); 1861 } 1862 return zalloc(pvzone); 1863 } 1864 1865 /* 1866 * This routine is very drastic, but can save the system 1867 * in a pinch. 1868 * 1869 * No requirements. 1870 */ 1871 void 1872 pmap_collect(void) 1873 { 1874 int i; 1875 vm_page_t m; 1876 static int warningdone=0; 1877 1878 if (pmap_pagedaemon_waken == 0) 1879 return; 1880 lwkt_gettoken(&vm_token); 1881 pmap_pagedaemon_waken = 0; 1882 1883 if (warningdone < 5) { 1884 kprintf("pmap_collect: collecting pv entries -- " 1885 "suggest increasing PMAP_SHPGPERPROC\n"); 1886 warningdone++; 1887 } 1888 1889 for (i = 0; i < vm_page_array_size; i++) { 1890 m = &vm_page_array[i]; 1891 if (m->wire_count || m->hold_count) 1892 continue; 1893 if (vm_page_busy_try(m, TRUE) == 0) { 1894 if (m->wire_count == 0 && m->hold_count == 0) { 1895 pmap_remove_all(m); 1896 } 1897 vm_page_wakeup(m); 1898 } 1899 } 1900 lwkt_reltoken(&vm_token); 1901 } 1902 1903 1904 /* 1905 * If it is the first entry on the list, it is actually 1906 * in the header and we must copy the following entry up 1907 * to the header. Otherwise we must search the list for 1908 * the entry. In either case we free the now unused entry. 1909 * 1910 * caller must hold vm_token. 1911 */ 1912 static int 1913 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1914 { 1915 pv_entry_t pv; 1916 int rtval; 1917 1918 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1919 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1920 if (pmap == pv->pv_pmap && va == pv->pv_va) 1921 break; 1922 } 1923 } else { 1924 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1925 if (va == pv->pv_va) 1926 break; 1927 } 1928 } 1929 1930 /* 1931 * Note that pv_ptem is NULL if the page table page itself is not 1932 * managed, even if the page being removed IS managed. 1933 */ 1934 rtval = 0; 1935 /* JGXXX When can 'pv' be NULL? */ 1936 if (pv) { 1937 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1938 m->md.pv_list_count--; 1939 atomic_add_int(&m->object->agg_pv_list_count, -1); 1940 KKASSERT(m->md.pv_list_count >= 0); 1941 if (TAILQ_EMPTY(&m->md.pv_list)) 1942 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1943 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1944 ++pmap->pm_generation; 1945 KKASSERT(pmap->pm_pteobj != NULL); 1946 vm_object_hold(pmap->pm_pteobj); 1947 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1948 vm_object_drop(pmap->pm_pteobj); 1949 free_pv_entry(pv); 1950 } 1951 return rtval; 1952 } 1953 1954 /* 1955 * Create a pv entry for page at pa for (pmap, va). If the page table page 1956 * holding the VA is managed, mpte will be non-NULL. 1957 */ 1958 static void 1959 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1960 { 1961 pv_entry_t pv; 1962 1963 crit_enter(); 1964 pv = get_pv_entry(); 1965 pv->pv_va = va; 1966 pv->pv_pmap = pmap; 1967 pv->pv_ptem = mpte; 1968 1969 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1970 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1971 m->md.pv_list_count++; 1972 atomic_add_int(&m->object->agg_pv_list_count, 1); 1973 1974 crit_exit(); 1975 } 1976 1977 /* 1978 * pmap_remove_pte: do the things to unmap a page in a process 1979 */ 1980 static int 1981 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1982 { 1983 pt_entry_t oldpte; 1984 vm_page_t m; 1985 1986 oldpte = pmap_inval_loadandclear(ptq, pmap, va); 1987 if (oldpte & VPTE_WIRED) 1988 --pmap->pm_stats.wired_count; 1989 KKASSERT(pmap->pm_stats.wired_count >= 0); 1990 1991 #if 0 1992 /* 1993 * Machines that don't support invlpg, also don't support 1994 * PG_G. XXX PG_G is disabled for SMP so don't worry about 1995 * the SMP case. 1996 */ 1997 if (oldpte & PG_G) 1998 cpu_invlpg((void *)va); 1999 #endif 2000 KKASSERT(pmap->pm_stats.resident_count > 0); 2001 --pmap->pm_stats.resident_count; 2002 if (oldpte & VPTE_MANAGED) { 2003 m = PHYS_TO_VM_PAGE(oldpte); 2004 if (oldpte & VPTE_M) { 2005 #if defined(PMAP_DIAGNOSTIC) 2006 if (pmap_nw_modified(oldpte)) { 2007 kprintf("pmap_remove: modified page not " 2008 "writable: va: 0x%lx, pte: 0x%lx\n", 2009 va, oldpte); 2010 } 2011 #endif 2012 if (pmap_track_modified(pmap, va)) 2013 vm_page_dirty(m); 2014 } 2015 if (oldpte & VPTE_A) 2016 vm_page_flag_set(m, PG_REFERENCED); 2017 return pmap_remove_entry(pmap, m, va); 2018 } else { 2019 return pmap_unuse_pt(pmap, va, NULL); 2020 } 2021 2022 return 0; 2023 } 2024 2025 /* 2026 * pmap_remove_page: 2027 * 2028 * Remove a single page from a process address space. 2029 * 2030 * This function may not be called from an interrupt if the pmap is 2031 * not kernel_pmap. 2032 */ 2033 static void 2034 pmap_remove_page(struct pmap *pmap, vm_offset_t va) 2035 { 2036 pt_entry_t *pte; 2037 2038 pte = pmap_pte(pmap, va); 2039 if (pte == NULL) 2040 return; 2041 if ((*pte & VPTE_V) == 0) 2042 return; 2043 pmap_remove_pte(pmap, pte, va); 2044 } 2045 2046 /* 2047 * Remove the given range of addresses from the specified map. 2048 * 2049 * It is assumed that the start and end are properly rounded to 2050 * the page size. 2051 * 2052 * This function may not be called from an interrupt if the pmap is 2053 * not kernel_pmap. 2054 * 2055 * No requirements. 2056 */ 2057 void 2058 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 2059 { 2060 vm_offset_t va_next; 2061 pml4_entry_t *pml4e; 2062 pdp_entry_t *pdpe; 2063 pd_entry_t ptpaddr, *pde; 2064 pt_entry_t *pte; 2065 2066 if (pmap == NULL) 2067 return; 2068 2069 vm_object_hold(pmap->pm_pteobj); 2070 lwkt_gettoken(&vm_token); 2071 KKASSERT(pmap->pm_stats.resident_count >= 0); 2072 if (pmap->pm_stats.resident_count == 0) { 2073 lwkt_reltoken(&vm_token); 2074 vm_object_drop(pmap->pm_pteobj); 2075 return; 2076 } 2077 2078 /* 2079 * special handling of removing one page. a very 2080 * common operation and easy to short circuit some 2081 * code. 2082 */ 2083 if (sva + PAGE_SIZE == eva) { 2084 pde = pmap_pde(pmap, sva); 2085 if (pde && (*pde & VPTE_PS) == 0) { 2086 pmap_remove_page(pmap, sva); 2087 lwkt_reltoken(&vm_token); 2088 vm_object_drop(pmap->pm_pteobj); 2089 return; 2090 } 2091 } 2092 2093 for (; sva < eva; sva = va_next) { 2094 pml4e = pmap_pml4e(pmap, sva); 2095 if ((*pml4e & VPTE_V) == 0) { 2096 va_next = (sva + NBPML4) & ~PML4MASK; 2097 if (va_next < sva) 2098 va_next = eva; 2099 continue; 2100 } 2101 2102 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2103 if ((*pdpe & VPTE_V) == 0) { 2104 va_next = (sva + NBPDP) & ~PDPMASK; 2105 if (va_next < sva) 2106 va_next = eva; 2107 continue; 2108 } 2109 2110 /* 2111 * Calculate index for next page table. 2112 */ 2113 va_next = (sva + NBPDR) & ~PDRMASK; 2114 if (va_next < sva) 2115 va_next = eva; 2116 2117 pde = pmap_pdpe_to_pde(pdpe, sva); 2118 ptpaddr = *pde; 2119 2120 /* 2121 * Weed out invalid mappings. 2122 */ 2123 if (ptpaddr == 0) 2124 continue; 2125 2126 /* 2127 * Check for large page. 2128 */ 2129 if ((ptpaddr & VPTE_PS) != 0) { 2130 /* JG FreeBSD has more complex treatment here */ 2131 KKASSERT(*pde != 0); 2132 pmap_inval_pde(pde, pmap, sva); 2133 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2134 continue; 2135 } 2136 2137 /* 2138 * Limit our scan to either the end of the va represented 2139 * by the current page table page, or to the end of the 2140 * range being removed. 2141 */ 2142 if (va_next > eva) 2143 va_next = eva; 2144 2145 /* 2146 * NOTE: pmap_remove_pte() can block. 2147 */ 2148 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2149 sva += PAGE_SIZE) { 2150 if (*pte == 0) 2151 continue; 2152 if (pmap_remove_pte(pmap, pte, sva)) 2153 break; 2154 } 2155 } 2156 lwkt_reltoken(&vm_token); 2157 vm_object_drop(pmap->pm_pteobj); 2158 } 2159 2160 /* 2161 * Removes this physical page from all physical maps in which it resides. 2162 * Reflects back modify bits to the pager. 2163 * 2164 * This routine may not be called from an interrupt. 2165 * 2166 * No requirements. 2167 */ 2168 static void 2169 pmap_remove_all(vm_page_t m) 2170 { 2171 pt_entry_t *pte, tpte; 2172 pv_entry_t pv; 2173 2174 #if defined(PMAP_DIAGNOSTIC) 2175 /* 2176 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 2177 * pages! 2178 */ 2179 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2180 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); 2181 } 2182 #endif 2183 2184 lwkt_gettoken(&vm_token); 2185 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2186 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0); 2187 --pv->pv_pmap->pm_stats.resident_count; 2188 2189 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2190 KKASSERT(pte != NULL); 2191 2192 tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va); 2193 if (tpte & VPTE_WIRED) 2194 pv->pv_pmap->pm_stats.wired_count--; 2195 KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0); 2196 2197 if (tpte & VPTE_A) 2198 vm_page_flag_set(m, PG_REFERENCED); 2199 2200 /* 2201 * Update the vm_page_t clean and reference bits. 2202 */ 2203 if (tpte & VPTE_M) { 2204 #if defined(PMAP_DIAGNOSTIC) 2205 if (pmap_nw_modified(tpte)) { 2206 kprintf( 2207 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 2208 pv->pv_va, tpte); 2209 } 2210 #endif 2211 if (pmap_track_modified(pv->pv_pmap, pv->pv_va)) 2212 vm_page_dirty(m); 2213 } 2214 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2215 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2216 ++pv->pv_pmap->pm_generation; 2217 m->md.pv_list_count--; 2218 atomic_add_int(&m->object->agg_pv_list_count, -1); 2219 KKASSERT(m->md.pv_list_count >= 0); 2220 if (TAILQ_EMPTY(&m->md.pv_list)) 2221 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2222 vm_object_hold(pv->pv_pmap->pm_pteobj); 2223 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2224 vm_object_drop(pv->pv_pmap->pm_pteobj); 2225 free_pv_entry(pv); 2226 } 2227 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); 2228 lwkt_reltoken(&vm_token); 2229 } 2230 2231 /* 2232 * Set the physical protection on the specified range of this map 2233 * as requested. 2234 * 2235 * This function may not be called from an interrupt if the map is 2236 * not the kernel_pmap. 2237 * 2238 * No requirements. 2239 */ 2240 void 2241 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2242 { 2243 vm_offset_t va_next; 2244 pml4_entry_t *pml4e; 2245 pdp_entry_t *pdpe; 2246 pd_entry_t ptpaddr, *pde; 2247 pt_entry_t *pte; 2248 2249 /* JG review for NX */ 2250 2251 if (pmap == NULL) 2252 return; 2253 2254 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2255 pmap_remove(pmap, sva, eva); 2256 return; 2257 } 2258 2259 if (prot & VM_PROT_WRITE) 2260 return; 2261 2262 lwkt_gettoken(&vm_token); 2263 2264 for (; sva < eva; sva = va_next) { 2265 2266 pml4e = pmap_pml4e(pmap, sva); 2267 if ((*pml4e & VPTE_V) == 0) { 2268 va_next = (sva + NBPML4) & ~PML4MASK; 2269 if (va_next < sva) 2270 va_next = eva; 2271 continue; 2272 } 2273 2274 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2275 if ((*pdpe & VPTE_V) == 0) { 2276 va_next = (sva + NBPDP) & ~PDPMASK; 2277 if (va_next < sva) 2278 va_next = eva; 2279 continue; 2280 } 2281 2282 va_next = (sva + NBPDR) & ~PDRMASK; 2283 if (va_next < sva) 2284 va_next = eva; 2285 2286 pde = pmap_pdpe_to_pde(pdpe, sva); 2287 ptpaddr = *pde; 2288 2289 /* 2290 * Check for large page. 2291 */ 2292 if ((ptpaddr & VPTE_PS) != 0) { 2293 /* JG correct? */ 2294 pmap_clean_pde(pde, pmap, sva); 2295 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2296 continue; 2297 } 2298 2299 /* 2300 * Weed out invalid mappings. Note: we assume that the page 2301 * directory table is always allocated, and in kernel virtual. 2302 */ 2303 if (ptpaddr == 0) 2304 continue; 2305 2306 if (va_next > eva) 2307 va_next = eva; 2308 2309 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2310 sva += PAGE_SIZE) { 2311 pt_entry_t pbits; 2312 vm_page_t m; 2313 2314 /* 2315 * Clean managed pages and also check the accessed 2316 * bit. Just remove write perms for unmanaged 2317 * pages. Be careful of races, turning off write 2318 * access will force a fault rather then setting 2319 * the modified bit at an unexpected time. 2320 */ 2321 if (*pte & VPTE_MANAGED) { 2322 pbits = pmap_clean_pte(pte, pmap, sva); 2323 m = NULL; 2324 if (pbits & VPTE_A) { 2325 m = PHYS_TO_VM_PAGE(pbits & VPTE_FRAME); 2326 vm_page_flag_set(m, PG_REFERENCED); 2327 atomic_clear_long(pte, VPTE_A); 2328 } 2329 if (pbits & VPTE_M) { 2330 if (pmap_track_modified(pmap, sva)) { 2331 if (m == NULL) 2332 m = PHYS_TO_VM_PAGE(pbits & VPTE_FRAME); 2333 vm_page_dirty(m); 2334 } 2335 } 2336 } else { 2337 pbits = pmap_setro_pte(pte, pmap, sva); 2338 } 2339 } 2340 } 2341 lwkt_reltoken(&vm_token); 2342 } 2343 2344 /* 2345 * Enter a managed page into a pmap. If the page is not wired related pmap 2346 * data can be destroyed at any time for later demand-operation. 2347 * 2348 * Insert the vm_page (m) at virtual address (v) in (pmap), with the 2349 * specified protection, and wire the mapping if requested. 2350 * 2351 * NOTE: This routine may not lazy-evaluate or lose information. The 2352 * page must actually be inserted into the given map NOW. 2353 * 2354 * NOTE: When entering a page at a KVA address, the pmap must be the 2355 * kernel_pmap. 2356 * 2357 * No requirements. 2358 */ 2359 void 2360 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2361 boolean_t wired, vm_map_entry_t entry __unused) 2362 { 2363 vm_paddr_t pa; 2364 pd_entry_t *pde; 2365 pt_entry_t *pte; 2366 vm_paddr_t opa; 2367 pt_entry_t origpte, newpte; 2368 vm_page_t mpte; 2369 2370 if (pmap == NULL) 2371 return; 2372 2373 va = trunc_page(va); 2374 2375 vm_object_hold(pmap->pm_pteobj); 2376 lwkt_gettoken(&vm_token); 2377 2378 /* 2379 * Get the page table page. The kernel_pmap's page table pages 2380 * are preallocated and have no associated vm_page_t. 2381 */ 2382 if (pmap == &kernel_pmap) 2383 mpte = NULL; 2384 else 2385 mpte = pmap_allocpte(pmap, va); 2386 2387 pde = pmap_pde(pmap, va); 2388 if (pde != NULL && (*pde & VPTE_V) != 0) { 2389 if ((*pde & VPTE_PS) != 0) 2390 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2391 pte = pmap_pde_to_pte(pde, va); 2392 } else { 2393 panic("pmap_enter: invalid page directory va=%#lx", va); 2394 } 2395 2396 KKASSERT(pte != NULL); 2397 /* 2398 * Deal with races on the original mapping (though don't worry 2399 * about VPTE_A races) by cleaning it. This will force a fault 2400 * if an attempt is made to write to the page. 2401 */ 2402 pa = VM_PAGE_TO_PHYS(m); 2403 origpte = pmap_clean_pte(pte, pmap, va); 2404 opa = origpte & VPTE_FRAME; 2405 2406 if (origpte & VPTE_PS) 2407 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2408 2409 /* 2410 * Mapping has not changed, must be protection or wiring change. 2411 */ 2412 if (origpte && (opa == pa)) { 2413 /* 2414 * Wiring change, just update stats. We don't worry about 2415 * wiring PT pages as they remain resident as long as there 2416 * are valid mappings in them. Hence, if a user page is wired, 2417 * the PT page will be also. 2418 */ 2419 if (wired && ((origpte & VPTE_WIRED) == 0)) 2420 ++pmap->pm_stats.wired_count; 2421 else if (!wired && (origpte & VPTE_WIRED)) 2422 --pmap->pm_stats.wired_count; 2423 2424 /* 2425 * Remove the extra pte reference. Note that we cannot 2426 * optimize the RO->RW case because we have adjusted the 2427 * wiring count above and may need to adjust the wiring 2428 * bits below. 2429 */ 2430 if (mpte) 2431 mpte->hold_count--; 2432 2433 /* 2434 * We might be turning off write access to the page, 2435 * so we go ahead and sense modify status. 2436 */ 2437 if (origpte & VPTE_MANAGED) { 2438 if ((origpte & VPTE_M) && 2439 pmap_track_modified(pmap, va)) { 2440 vm_page_t om; 2441 om = PHYS_TO_VM_PAGE(opa); 2442 vm_page_dirty(om); 2443 } 2444 pa |= VPTE_MANAGED; 2445 KKASSERT(m->flags & PG_MAPPED); 2446 } 2447 goto validate; 2448 } 2449 /* 2450 * Mapping has changed, invalidate old range and fall through to 2451 * handle validating new mapping. 2452 */ 2453 if (opa) { 2454 int err; 2455 err = pmap_remove_pte(pmap, pte, va); 2456 if (err) 2457 panic("pmap_enter: pte vanished, va: 0x%lx", va); 2458 } 2459 2460 /* 2461 * Enter on the PV list if part of our managed memory. Note that we 2462 * raise IPL while manipulating pv_table since pmap_enter can be 2463 * called at interrupt time. 2464 */ 2465 if (pmap_initialized && 2466 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2467 pmap_insert_entry(pmap, va, mpte, m); 2468 pa |= VPTE_MANAGED; 2469 vm_page_flag_set(m, PG_MAPPED); 2470 } 2471 2472 /* 2473 * Increment counters 2474 */ 2475 ++pmap->pm_stats.resident_count; 2476 if (wired) 2477 pmap->pm_stats.wired_count++; 2478 2479 validate: 2480 /* 2481 * Now validate mapping with desired protection/wiring. 2482 */ 2483 newpte = (pt_entry_t) (pa | pte_prot(pmap, prot) | VPTE_V | VPTE_U); 2484 2485 if (wired) 2486 newpte |= VPTE_WIRED; 2487 // if (pmap != &kernel_pmap) 2488 newpte |= VPTE_U; 2489 2490 /* 2491 * If the mapping or permission bits are different from the 2492 * (now cleaned) original pte, an update is needed. We've 2493 * already downgraded or invalidated the page so all we have 2494 * to do now is update the bits. 2495 * 2496 * XXX should we synchronize RO->RW changes to avoid another 2497 * fault? 2498 */ 2499 if ((origpte & ~(VPTE_RW|VPTE_M|VPTE_A)) != newpte) { 2500 *pte = newpte | VPTE_A; 2501 if (newpte & VPTE_RW) 2502 vm_page_flag_set(m, PG_WRITEABLE); 2503 } 2504 KKASSERT((newpte & VPTE_MANAGED) == 0 || (m->flags & PG_MAPPED)); 2505 lwkt_reltoken(&vm_token); 2506 vm_object_drop(pmap->pm_pteobj); 2507 } 2508 2509 /* 2510 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. 2511 * 2512 * Currently this routine may only be used on user pmaps, not kernel_pmap. 2513 * 2514 * No requirements. 2515 */ 2516 void 2517 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) 2518 { 2519 pt_entry_t *pte; 2520 vm_paddr_t pa; 2521 vm_page_t mpte; 2522 vm_pindex_t ptepindex; 2523 pd_entry_t *ptepa; 2524 2525 KKASSERT(pmap != &kernel_pmap); 2526 2527 KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS); 2528 2529 /* 2530 * Calculate pagetable page index 2531 */ 2532 ptepindex = pmap_pde_pindex(va); 2533 2534 vm_object_hold(pmap->pm_pteobj); 2535 lwkt_gettoken(&vm_token); 2536 2537 do { 2538 /* 2539 * Get the page directory entry 2540 */ 2541 ptepa = pmap_pde(pmap, va); 2542 2543 /* 2544 * If the page table page is mapped, we just increment 2545 * the hold count, and activate it. 2546 */ 2547 if (ptepa && (*ptepa & VPTE_V) != 0) { 2548 if (*ptepa & VPTE_PS) 2549 panic("pmap_enter_quick: unexpected mapping into 2MB page"); 2550 if (pmap->pm_ptphint && 2551 (pmap->pm_ptphint->pindex == ptepindex)) { 2552 mpte = pmap->pm_ptphint; 2553 } else { 2554 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 2555 pmap->pm_ptphint = mpte; 2556 vm_page_wakeup(mpte); 2557 } 2558 if (mpte) 2559 mpte->hold_count++; 2560 } else { 2561 mpte = _pmap_allocpte(pmap, ptepindex); 2562 } 2563 } while (mpte == NULL); 2564 2565 /* 2566 * Ok, now that the page table page has been validated, get the pte. 2567 * If the pte is already mapped undo mpte's hold_count and 2568 * just return. 2569 */ 2570 pte = pmap_pte(pmap, va); 2571 if (*pte & VPTE_V) { 2572 KKASSERT(mpte != NULL); 2573 pmap_unwire_pte_hold(pmap, va, mpte); 2574 pa = VM_PAGE_TO_PHYS(m); 2575 KKASSERT(((*pte ^ pa) & VPTE_FRAME) == 0); 2576 lwkt_reltoken(&vm_token); 2577 vm_object_drop(pmap->pm_pteobj); 2578 return; 2579 } 2580 2581 /* 2582 * Enter on the PV list if part of our managed memory 2583 */ 2584 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2585 pmap_insert_entry(pmap, va, mpte, m); 2586 vm_page_flag_set(m, PG_MAPPED); 2587 } 2588 2589 /* 2590 * Increment counters 2591 */ 2592 ++pmap->pm_stats.resident_count; 2593 2594 pa = VM_PAGE_TO_PHYS(m); 2595 2596 /* 2597 * Now validate mapping with RO protection 2598 */ 2599 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2600 *pte = (vpte_t)pa | VPTE_V | VPTE_U; 2601 else 2602 *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED; 2603 /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */ 2604 /*pmap_inval_flush(&info); don't need for vkernel */ 2605 lwkt_reltoken(&vm_token); 2606 vm_object_drop(pmap->pm_pteobj); 2607 } 2608 2609 /* 2610 * Make a temporary mapping for a physical address. This is only intended 2611 * to be used for panic dumps. 2612 * 2613 * The caller is responsible for calling smp_invltlb(). 2614 */ 2615 void * 2616 pmap_kenter_temporary(vm_paddr_t pa, long i) 2617 { 2618 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa); 2619 return ((void *)crashdumpmap); 2620 } 2621 2622 #define MAX_INIT_PT (96) 2623 2624 /* 2625 * This routine preloads the ptes for a given object into the specified pmap. 2626 * This eliminates the blast of soft faults on process startup and 2627 * immediately after an mmap. 2628 * 2629 * No requirements. 2630 */ 2631 static int pmap_object_init_pt_callback(vm_page_t p, void *data); 2632 2633 void 2634 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, 2635 vm_object_t object, vm_pindex_t pindex, 2636 vm_size_t size, int limit) 2637 { 2638 struct rb_vm_page_scan_info info; 2639 struct lwp *lp; 2640 vm_size_t psize; 2641 2642 /* 2643 * We can't preinit if read access isn't set or there is no pmap 2644 * or object. 2645 */ 2646 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) 2647 return; 2648 2649 /* 2650 * We can't preinit if the pmap is not the current pmap 2651 */ 2652 lp = curthread->td_lwp; 2653 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) 2654 return; 2655 2656 psize = x86_64_btop(size); 2657 2658 if ((object->type != OBJT_VNODE) || 2659 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2660 (object->resident_page_count > MAX_INIT_PT))) { 2661 return; 2662 } 2663 2664 if (psize + pindex > object->size) { 2665 if (object->size < pindex) 2666 return; 2667 psize = object->size - pindex; 2668 } 2669 2670 if (psize == 0) 2671 return; 2672 2673 /* 2674 * Use a red-black scan to traverse the requested range and load 2675 * any valid pages found into the pmap. 2676 * 2677 * We cannot safely scan the object's memq unless we are in a 2678 * critical section since interrupts can remove pages from objects. 2679 */ 2680 info.start_pindex = pindex; 2681 info.end_pindex = pindex + psize - 1; 2682 info.limit = limit; 2683 info.mpte = NULL; 2684 info.addr = addr; 2685 info.pmap = pmap; 2686 2687 vm_object_hold_shared(object); 2688 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, 2689 pmap_object_init_pt_callback, &info); 2690 vm_object_drop(object); 2691 } 2692 2693 static 2694 int 2695 pmap_object_init_pt_callback(vm_page_t p, void *data) 2696 { 2697 struct rb_vm_page_scan_info *info = data; 2698 vm_pindex_t rel_index; 2699 /* 2700 * don't allow an madvise to blow away our really 2701 * free pages allocating pv entries. 2702 */ 2703 if ((info->limit & MAP_PREFAULT_MADVISE) && 2704 vmstats.v_free_count < vmstats.v_free_reserved) { 2705 return(-1); 2706 } 2707 2708 /* 2709 * Ignore list markers and ignore pages we cannot instantly 2710 * busy (while holding the object token). 2711 */ 2712 if (p->flags & PG_MARKER) 2713 return 0; 2714 if (vm_page_busy_try(p, TRUE)) 2715 return 0; 2716 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2717 (p->flags & PG_FICTITIOUS) == 0) { 2718 if ((p->queue - p->pc) == PQ_CACHE) 2719 vm_page_deactivate(p); 2720 rel_index = p->pindex - info->start_pindex; 2721 pmap_enter_quick(info->pmap, 2722 info->addr + x86_64_ptob(rel_index), p); 2723 } 2724 vm_page_wakeup(p); 2725 return(0); 2726 } 2727 2728 /* 2729 * Return TRUE if the pmap is in shape to trivially 2730 * pre-fault the specified address. 2731 * 2732 * Returns FALSE if it would be non-trivial or if a 2733 * pte is already loaded into the slot. 2734 * 2735 * No requirements. 2736 */ 2737 int 2738 pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) 2739 { 2740 pt_entry_t *pte; 2741 pd_entry_t *pde; 2742 int ret; 2743 2744 lwkt_gettoken(&vm_token); 2745 pde = pmap_pde(pmap, addr); 2746 if (pde == NULL || *pde == 0) { 2747 ret = 0; 2748 } else { 2749 pte = pmap_pde_to_pte(pde, addr); 2750 ret = (*pte) ? 0 : 1; 2751 } 2752 lwkt_reltoken(&vm_token); 2753 return (ret); 2754 } 2755 2756 /* 2757 * Change the wiring attribute for a map/virtual-address pair. 2758 * 2759 * The mapping must already exist in the pmap. 2760 * No other requirements. 2761 */ 2762 void 2763 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired, 2764 vm_map_entry_t entry __unused) 2765 { 2766 pt_entry_t *pte; 2767 2768 if (pmap == NULL) 2769 return; 2770 2771 lwkt_gettoken(&vm_token); 2772 pte = pmap_pte(pmap, va); 2773 2774 if (wired && !pmap_pte_w(pte)) 2775 pmap->pm_stats.wired_count++; 2776 else if (!wired && pmap_pte_w(pte)) 2777 pmap->pm_stats.wired_count--; 2778 2779 /* 2780 * Wiring is not a hardware characteristic so there is no need to 2781 * invalidate TLB. However, in an SMP environment we must use 2782 * a locked bus cycle to update the pte (if we are not using 2783 * the pmap_inval_*() API that is)... it's ok to do this for simple 2784 * wiring changes. 2785 */ 2786 if (wired) 2787 atomic_set_long(pte, VPTE_WIRED); 2788 else 2789 atomic_clear_long(pte, VPTE_WIRED); 2790 lwkt_reltoken(&vm_token); 2791 } 2792 2793 /* 2794 * Copy the range specified by src_addr/len 2795 * from the source map to the range dst_addr/len 2796 * in the destination map. 2797 * 2798 * This routine is only advisory and need not do anything. 2799 */ 2800 void 2801 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2802 vm_size_t len, vm_offset_t src_addr) 2803 { 2804 /* 2805 * XXX BUGGY. Amoung other things srcmpte is assumed to remain 2806 * valid through blocking calls, and that's just not going to 2807 * be the case. 2808 * 2809 * FIXME! 2810 */ 2811 return; 2812 } 2813 2814 /* 2815 * pmap_zero_page: 2816 * 2817 * Zero the specified physical page. 2818 * 2819 * This function may be called from an interrupt and no locking is 2820 * required. 2821 */ 2822 void 2823 pmap_zero_page(vm_paddr_t phys) 2824 { 2825 vm_offset_t va = PHYS_TO_DMAP(phys); 2826 2827 bzero((void *)va, PAGE_SIZE); 2828 } 2829 2830 /* 2831 * pmap_zero_page: 2832 * 2833 * Zero part of a physical page by mapping it into memory and clearing 2834 * its contents with bzero. 2835 * 2836 * off and size may not cover an area beyond a single hardware page. 2837 */ 2838 void 2839 pmap_zero_page_area(vm_paddr_t phys, int off, int size) 2840 { 2841 crit_enter(); 2842 vm_offset_t virt = PHYS_TO_DMAP(phys); 2843 bzero((char *)virt + off, size); 2844 crit_exit(); 2845 } 2846 2847 /* 2848 * pmap_copy_page: 2849 * 2850 * Copy the physical page from the source PA to the target PA. 2851 * This function may be called from an interrupt. No locking 2852 * is required. 2853 */ 2854 void 2855 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) 2856 { 2857 vm_offset_t src_virt, dst_virt; 2858 2859 crit_enter(); 2860 src_virt = PHYS_TO_DMAP(src); 2861 dst_virt = PHYS_TO_DMAP(dst); 2862 bcopy((void *)src_virt, (void *)dst_virt, PAGE_SIZE); 2863 crit_exit(); 2864 } 2865 2866 /* 2867 * pmap_copy_page_frag: 2868 * 2869 * Copy the physical page from the source PA to the target PA. 2870 * This function may be called from an interrupt. No locking 2871 * is required. 2872 */ 2873 void 2874 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) 2875 { 2876 vm_offset_t src_virt, dst_virt; 2877 2878 crit_enter(); 2879 src_virt = PHYS_TO_DMAP(src); 2880 dst_virt = PHYS_TO_DMAP(dst); 2881 bcopy((char *)src_virt + (src & PAGE_MASK), 2882 (char *)dst_virt + (dst & PAGE_MASK), 2883 bytes); 2884 crit_exit(); 2885 } 2886 2887 /* 2888 * Returns true if the pmap's pv is one of the first 16 pvs linked to 2889 * from this page. This count may be changed upwards or downwards 2890 * in the future; it is only necessary that true be returned for a small 2891 * subset of pmaps for proper page aging. 2892 * 2893 * No other requirements. 2894 */ 2895 boolean_t 2896 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2897 { 2898 pv_entry_t pv; 2899 int loops = 0; 2900 2901 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2902 return FALSE; 2903 2904 crit_enter(); 2905 lwkt_gettoken(&vm_token); 2906 2907 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2908 if (pv->pv_pmap == pmap) { 2909 lwkt_reltoken(&vm_token); 2910 crit_exit(); 2911 return TRUE; 2912 } 2913 loops++; 2914 if (loops >= 16) 2915 break; 2916 } 2917 lwkt_reltoken(&vm_token); 2918 crit_exit(); 2919 return (FALSE); 2920 } 2921 2922 /* 2923 * Remove all pages from specified address space this aids process 2924 * exit speeds. Also, this code is special cased for current 2925 * process only, but can have the more generic (and slightly slower) 2926 * mode enabled. This is much faster than pmap_remove in the case 2927 * of running down an entire address space. 2928 * 2929 * No other requirements. 2930 */ 2931 void 2932 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2933 { 2934 pt_entry_t *pte, tpte; 2935 pv_entry_t pv, npv; 2936 vm_page_t m; 2937 int save_generation; 2938 2939 if (pmap->pm_pteobj) 2940 vm_object_hold(pmap->pm_pteobj); 2941 lwkt_gettoken(&vm_token); 2942 2943 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2944 if (pv->pv_va >= eva || pv->pv_va < sva) { 2945 npv = TAILQ_NEXT(pv, pv_plist); 2946 continue; 2947 } 2948 2949 KKASSERT(pmap == pv->pv_pmap); 2950 2951 pte = pmap_pte(pmap, pv->pv_va); 2952 2953 /* 2954 * We cannot remove wired pages from a process' mapping 2955 * at this time 2956 */ 2957 if (*pte & VPTE_WIRED) { 2958 npv = TAILQ_NEXT(pv, pv_plist); 2959 continue; 2960 } 2961 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2962 2963 m = PHYS_TO_VM_PAGE(tpte & VPTE_FRAME); 2964 2965 KASSERT(m < &vm_page_array[vm_page_array_size], 2966 ("pmap_remove_pages: bad tpte %lx", tpte)); 2967 2968 KKASSERT(pmap->pm_stats.resident_count > 0); 2969 --pmap->pm_stats.resident_count; 2970 2971 /* 2972 * Update the vm_page_t clean and reference bits. 2973 */ 2974 if (tpte & VPTE_M) { 2975 vm_page_dirty(m); 2976 } 2977 2978 npv = TAILQ_NEXT(pv, pv_plist); 2979 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 2980 save_generation = ++pmap->pm_generation; 2981 2982 m->md.pv_list_count--; 2983 atomic_add_int(&m->object->agg_pv_list_count, -1); 2984 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2985 if (TAILQ_EMPTY(&m->md.pv_list)) 2986 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2987 2988 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2989 free_pv_entry(pv); 2990 2991 /* 2992 * Restart the scan if we blocked during the unuse or free 2993 * calls and other removals were made. 2994 */ 2995 if (save_generation != pmap->pm_generation) { 2996 kprintf("Warning: pmap_remove_pages race-A avoided\n"); 2997 npv = TAILQ_FIRST(&pmap->pm_pvlist); 2998 } 2999 } 3000 lwkt_reltoken(&vm_token); 3001 if (pmap->pm_pteobj) 3002 vm_object_drop(pmap->pm_pteobj); 3003 } 3004 3005 /* 3006 * pmap_testbit tests bits in active mappings of a VM page. 3007 */ 3008 static boolean_t 3009 pmap_testbit(vm_page_t m, int bit) 3010 { 3011 pv_entry_t pv; 3012 pt_entry_t *pte; 3013 3014 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3015 return FALSE; 3016 3017 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 3018 return FALSE; 3019 3020 crit_enter(); 3021 3022 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3023 /* 3024 * if the bit being tested is the modified bit, then 3025 * mark clean_map and ptes as never 3026 * modified. 3027 */ 3028 if (bit & (VPTE_A|VPTE_M)) { 3029 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3030 continue; 3031 } 3032 3033 #if defined(PMAP_DIAGNOSTIC) 3034 if (pv->pv_pmap == NULL) { 3035 kprintf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 3036 continue; 3037 } 3038 #endif 3039 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3040 if (*pte & bit) { 3041 crit_exit(); 3042 return TRUE; 3043 } 3044 } 3045 crit_exit(); 3046 return (FALSE); 3047 } 3048 3049 /* 3050 * This routine is used to clear bits in ptes. Certain bits require special 3051 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. 3052 * 3053 * This routine is only called with certain VPTE_* bit combinations. 3054 */ 3055 static __inline void 3056 pmap_clearbit(vm_page_t m, int bit) 3057 { 3058 pv_entry_t pv; 3059 pt_entry_t *pte; 3060 pt_entry_t pbits; 3061 3062 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3063 return; 3064 3065 crit_enter(); 3066 3067 /* 3068 * Loop over all current mappings setting/clearing as appropos If 3069 * setting RO do we need to clear the VAC? 3070 */ 3071 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3072 /* 3073 * don't write protect pager mappings 3074 */ 3075 if (bit == VPTE_RW) { 3076 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3077 continue; 3078 } 3079 3080 #if defined(PMAP_DIAGNOSTIC) 3081 if (pv->pv_pmap == NULL) { 3082 kprintf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va); 3083 continue; 3084 } 3085 #endif 3086 3087 /* 3088 * Careful here. We can use a locked bus instruction to 3089 * clear VPTE_A or VPTE_M safely but we need to synchronize 3090 * with the target cpus when we mess with VPTE_RW. 3091 * 3092 * On virtual kernels we must force a new fault-on-write 3093 * in the real kernel if we clear the Modify bit ourselves, 3094 * otherwise the real kernel will not get a new fault and 3095 * will never set our Modify bit again. 3096 */ 3097 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3098 if (*pte & bit) { 3099 if (bit == VPTE_RW) { 3100 /* 3101 * We must also clear VPTE_M when clearing 3102 * VPTE_RW 3103 */ 3104 pbits = pmap_clean_pte(pte, pv->pv_pmap, 3105 pv->pv_va); 3106 if (pbits & VPTE_M) 3107 vm_page_dirty(m); 3108 } else if (bit == VPTE_M) { 3109 /* 3110 * We do not have to make the page read-only 3111 * when clearing the Modify bit. The real 3112 * kernel will make the real PTE read-only 3113 * or otherwise detect the write and set 3114 * our VPTE_M again simply by us invalidating 3115 * the real kernel VA for the pmap (as we did 3116 * above). This allows the real kernel to 3117 * handle the write fault without forwarding 3118 * the fault to us. 3119 */ 3120 atomic_clear_long(pte, VPTE_M); 3121 } else if ((bit & (VPTE_RW|VPTE_M)) == (VPTE_RW|VPTE_M)) { 3122 /* 3123 * We've been asked to clear W & M, I guess 3124 * the caller doesn't want us to update 3125 * the dirty status of the VM page. 3126 */ 3127 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va); 3128 } else { 3129 /* 3130 * We've been asked to clear bits that do 3131 * not interact with hardware. 3132 */ 3133 atomic_clear_long(pte, bit); 3134 } 3135 } 3136 } 3137 crit_exit(); 3138 } 3139 3140 /* 3141 * Lower the permission for all mappings to a given page. 3142 * 3143 * No other requirements. 3144 */ 3145 void 3146 pmap_page_protect(vm_page_t m, vm_prot_t prot) 3147 { 3148 /* JG NX support? */ 3149 if ((prot & VM_PROT_WRITE) == 0) { 3150 lwkt_gettoken(&vm_token); 3151 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3152 pmap_clearbit(m, VPTE_RW); 3153 vm_page_flag_clear(m, PG_WRITEABLE); 3154 } else { 3155 pmap_remove_all(m); 3156 } 3157 lwkt_reltoken(&vm_token); 3158 } 3159 } 3160 3161 vm_paddr_t 3162 pmap_phys_address(vm_pindex_t ppn) 3163 { 3164 return (x86_64_ptob(ppn)); 3165 } 3166 3167 /* 3168 * Return a count of reference bits for a page, clearing those bits. 3169 * It is not necessary for every reference bit to be cleared, but it 3170 * is necessary that 0 only be returned when there are truly no 3171 * reference bits set. 3172 * 3173 * XXX: The exact number of bits to check and clear is a matter that 3174 * should be tested and standardized at some point in the future for 3175 * optimal aging of shared pages. 3176 * 3177 * No other requirements. 3178 */ 3179 int 3180 pmap_ts_referenced(vm_page_t m) 3181 { 3182 pv_entry_t pv, pvf, pvn; 3183 pt_entry_t *pte; 3184 int rtval = 0; 3185 3186 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3187 return (rtval); 3188 3189 crit_enter(); 3190 lwkt_gettoken(&vm_token); 3191 3192 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3193 3194 pvf = pv; 3195 3196 do { 3197 pvn = TAILQ_NEXT(pv, pv_list); 3198 3199 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3200 3201 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3202 3203 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3204 continue; 3205 3206 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3207 3208 if (pte && (*pte & VPTE_A)) { 3209 atomic_clear_long(pte, VPTE_A); 3210 rtval++; 3211 if (rtval > 4) { 3212 break; 3213 } 3214 } 3215 } while ((pv = pvn) != NULL && pv != pvf); 3216 } 3217 lwkt_reltoken(&vm_token); 3218 crit_exit(); 3219 3220 return (rtval); 3221 } 3222 3223 /* 3224 * Return whether or not the specified physical page was modified 3225 * in any physical maps. 3226 * 3227 * No other requirements. 3228 */ 3229 boolean_t 3230 pmap_is_modified(vm_page_t m) 3231 { 3232 boolean_t res; 3233 3234 lwkt_gettoken(&vm_token); 3235 res = pmap_testbit(m, VPTE_M); 3236 lwkt_reltoken(&vm_token); 3237 return (res); 3238 } 3239 3240 /* 3241 * Clear the modify bits on the specified physical page. 3242 * 3243 * No other requirements. 3244 */ 3245 void 3246 pmap_clear_modify(vm_page_t m) 3247 { 3248 lwkt_gettoken(&vm_token); 3249 pmap_clearbit(m, VPTE_M); 3250 lwkt_reltoken(&vm_token); 3251 } 3252 3253 /* 3254 * Clear the reference bit on the specified physical page. 3255 * 3256 * No other requirements. 3257 */ 3258 void 3259 pmap_clear_reference(vm_page_t m) 3260 { 3261 lwkt_gettoken(&vm_token); 3262 pmap_clearbit(m, VPTE_A); 3263 lwkt_reltoken(&vm_token); 3264 } 3265 3266 /* 3267 * Miscellaneous support routines follow 3268 */ 3269 3270 static void 3271 i386_protection_init(void) 3272 { 3273 int *kp, prot; 3274 3275 kp = protection_codes; 3276 for (prot = 0; prot < 8; prot++) { 3277 if (prot & VM_PROT_READ) 3278 *kp |= 0; /* if it's VALID is readeable */ 3279 if (prot & VM_PROT_WRITE) 3280 *kp |= VPTE_RW; 3281 if (prot & VM_PROT_EXECUTE) 3282 *kp |= 0; /* if it's VALID is executable */ 3283 ++kp; 3284 } 3285 } 3286 3287 /* 3288 * Sets the memory attribute for the specified page. 3289 */ 3290 void 3291 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3292 { 3293 /* This is a vkernel, do nothing */ 3294 } 3295 3296 /* 3297 * Change the PAT attribute on an existing kernel memory map. Caller 3298 * must ensure that the virtual memory in question is not accessed 3299 * during the adjustment. 3300 */ 3301 void 3302 pmap_change_attr(vm_offset_t va, vm_size_t count, int mode) 3303 { 3304 /* This is a vkernel, do nothing */ 3305 } 3306 3307 /* 3308 * Perform the pmap work for mincore 3309 * 3310 * No other requirements. 3311 */ 3312 int 3313 pmap_mincore(pmap_t pmap, vm_offset_t addr) 3314 { 3315 pt_entry_t *ptep, pte; 3316 vm_page_t m; 3317 int val = 0; 3318 3319 lwkt_gettoken(&vm_token); 3320 ptep = pmap_pte(pmap, addr); 3321 3322 if (ptep && (pte = *ptep) != 0) { 3323 vm_paddr_t pa; 3324 3325 val = MINCORE_INCORE; 3326 if ((pte & VPTE_MANAGED) == 0) 3327 goto done; 3328 3329 pa = pte & VPTE_FRAME; 3330 3331 m = PHYS_TO_VM_PAGE(pa); 3332 3333 /* 3334 * Modified by us 3335 */ 3336 if (pte & VPTE_M) 3337 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3338 /* 3339 * Modified by someone 3340 */ 3341 else if (m->dirty || pmap_is_modified(m)) 3342 val |= MINCORE_MODIFIED_OTHER; 3343 /* 3344 * Referenced by us 3345 */ 3346 if (pte & VPTE_A) 3347 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3348 3349 /* 3350 * Referenced by someone 3351 */ 3352 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3353 val |= MINCORE_REFERENCED_OTHER; 3354 vm_page_flag_set(m, PG_REFERENCED); 3355 } 3356 } 3357 done: 3358 lwkt_reltoken(&vm_token); 3359 return val; 3360 } 3361 3362 /* 3363 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new 3364 * vmspace will be ref'd and the old one will be deref'd. 3365 * 3366 * Caller must hold vmspace->vm_map.token for oldvm and newvm 3367 */ 3368 void 3369 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) 3370 { 3371 struct vmspace *oldvm; 3372 struct lwp *lp; 3373 3374 crit_enter(); 3375 oldvm = p->p_vmspace; 3376 if (oldvm != newvm) { 3377 if (adjrefs) 3378 vmspace_ref(newvm); 3379 p->p_vmspace = newvm; 3380 KKASSERT(p->p_nthreads == 1); 3381 lp = RB_ROOT(&p->p_lwp_tree); 3382 pmap_setlwpvm(lp, newvm); 3383 if (adjrefs) 3384 vmspace_rel(oldvm); 3385 } 3386 crit_exit(); 3387 } 3388 3389 /* 3390 * Set the vmspace for a LWP. The vmspace is almost universally set the 3391 * same as the process vmspace, but virtual kernels need to swap out contexts 3392 * on a per-lwp basis. 3393 */ 3394 void 3395 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) 3396 { 3397 struct vmspace *oldvm; 3398 struct pmap *pmap; 3399 3400 oldvm = lp->lwp_vmspace; 3401 if (oldvm != newvm) { 3402 crit_enter(); 3403 lp->lwp_vmspace = newvm; 3404 if (curthread->td_lwp == lp) { 3405 pmap = vmspace_pmap(newvm); 3406 ATOMIC_CPUMASK_ORBIT(pmap->pm_active, mycpu->gd_cpuid); 3407 if (pmap->pm_active_lock & CPULOCK_EXCL) 3408 pmap_interlock_wait(newvm); 3409 #if defined(SWTCH_OPTIM_STATS) 3410 tlb_flush_count++; 3411 #endif 3412 pmap = vmspace_pmap(oldvm); 3413 ATOMIC_CPUMASK_NANDBIT(pmap->pm_active, 3414 mycpu->gd_cpuid); 3415 } 3416 crit_exit(); 3417 } 3418 } 3419 3420 /* 3421 * The swtch code tried to switch in a heavy weight process whos pmap 3422 * is locked by another cpu. We have to wait for the lock to clear before 3423 * the pmap can be used. 3424 */ 3425 void 3426 pmap_interlock_wait (struct vmspace *vm) 3427 { 3428 pmap_t pmap = vmspace_pmap(vm); 3429 3430 if (pmap->pm_active_lock & CPULOCK_EXCL) { 3431 crit_enter(); 3432 while (pmap->pm_active_lock & CPULOCK_EXCL) { 3433 cpu_ccfence(); 3434 pthread_yield(); 3435 } 3436 crit_exit(); 3437 } 3438 } 3439 3440 vm_offset_t 3441 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3442 { 3443 3444 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3445 return addr; 3446 } 3447 3448 addr = roundup2(addr, NBPDR); 3449 return addr; 3450 } 3451 3452 /* 3453 * Used by kmalloc/kfree, page already exists at va 3454 */ 3455 vm_page_t 3456 pmap_kvtom(vm_offset_t va) 3457 { 3458 vpte_t *ptep; 3459 3460 KKASSERT(va >= KvaStart && va < KvaEnd); 3461 ptep = vtopte(va); 3462 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); 3463 } 3464 3465 void 3466 pmap_object_init(vm_object_t object) 3467 { 3468 /* empty */ 3469 } 3470 3471 void 3472 pmap_object_free(vm_object_t object) 3473 { 3474 /* empty */ 3475 } 3476