1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1991 Regents of the University of California. 5 * Copyright (c) 1994 John S. Dyson 6 * Copyright (c) 1994 David Greenman 7 * Copyright (c) 2003 Peter Wemm 8 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 9 * Copyright (c) 2008, 2009 The DragonFly Project. 10 * Copyright (c) 2008, 2009 Jordan Gordeev. 11 * All rights reserved. 12 * 13 * This code is derived from software contributed to Berkeley by 14 * the Systems Programming Group of the University of Utah Computer 15 * Science Department and William Jolitz of UUNET Technologies Inc. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. All advertising materials mentioning features or use of this software 26 * must display the following acknowledgement: 27 * This product includes software developed by the University of 28 * California, Berkeley and its contributors. 29 * 4. Neither the name of the University nor the names of its contributors 30 * may be used to endorse or promote products derived from this software 31 * without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 * SUCH DAMAGE. 44 * 45 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 46 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ 47 */ 48 49 /* 50 * Manages physical address maps. 51 */ 52 53 #if JG 54 #include "opt_pmap.h" 55 #endif 56 #include "opt_msgbuf.h" 57 58 #include <sys/param.h> 59 #include <sys/systm.h> 60 #include <sys/kernel.h> 61 #include <sys/proc.h> 62 #include <sys/msgbuf.h> 63 #include <sys/vmmeter.h> 64 #include <sys/mman.h> 65 #include <sys/vmspace.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <sys/sysctl.h> 70 #include <sys/lock.h> 71 #include <vm/vm_kern.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_pageout.h> 77 #include <vm/vm_pager.h> 78 #include <vm/vm_zone.h> 79 80 #include <sys/user.h> 81 #include <sys/thread2.h> 82 #include <sys/sysref2.h> 83 #include <sys/spinlock2.h> 84 #include <vm/vm_page2.h> 85 86 #include <machine/cputypes.h> 87 #include <machine/md_var.h> 88 #include <machine/specialreg.h> 89 #include <machine/smp.h> 90 #include <machine/globaldata.h> 91 #include <machine/pmap.h> 92 #include <machine/pmap_inval.h> 93 94 #include <ddb/ddb.h> 95 96 #include <stdio.h> 97 #include <assert.h> 98 #include <stdlib.h> 99 #include <pthread.h> 100 101 #define PMAP_KEEP_PDIRS 102 #ifndef PMAP_SHPGPERPROC 103 #define PMAP_SHPGPERPROC 1000 104 #endif 105 106 #if defined(DIAGNOSTIC) 107 #define PMAP_DIAGNOSTIC 108 #endif 109 110 #define MINPV 2048 111 112 #if !defined(PMAP_DIAGNOSTIC) 113 #define PMAP_INLINE __inline 114 #else 115 #define PMAP_INLINE 116 #endif 117 118 /* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121 static pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va); 122 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 123 124 #define pmap_pde_v(pte) ((*(pd_entry_t *)pte & VPTE_V) != 0) 125 #define pmap_pte_w(pte) ((*(pt_entry_t *)pte & VPTE_WIRED) != 0) 126 #define pmap_pte_m(pte) ((*(pt_entry_t *)pte & VPTE_M) != 0) 127 #define pmap_pte_u(pte) ((*(pt_entry_t *)pte & VPTE_A) != 0) 128 #define pmap_pte_v(pte) ((*(pt_entry_t *)pte & VPTE_V) != 0) 129 130 /* 131 * Given a map and a machine independent protection code, 132 * convert to a vax protection code. 133 */ 134 #define pte_prot(m, p) \ 135 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) 136 static int protection_codes[8]; 137 138 struct pmap kernel_pmap; 139 140 static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 141 142 static vm_object_t kptobj; 143 144 static int nkpt; 145 146 static uint64_t KPDphys; /* phys addr of kernel level 2 */ 147 uint64_t KPDPphys; /* phys addr of kernel level 3 */ 148 uint64_t KPML4phys; /* phys addr of kernel level 4 */ 149 150 extern int vmm_enabled; 151 extern void *vkernel_stack; 152 153 /* 154 * Data for the pv entry allocation mechanism 155 */ 156 static vm_zone_t pvzone; 157 static struct vm_zone pvzone_store; 158 static struct vm_object pvzone_obj; 159 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; 160 static int pmap_pagedaemon_waken = 0; 161 static struct pv_entry *pvinit; 162 163 /* 164 * All those kernel PT submaps that BSD is so fond of 165 */ 166 pt_entry_t *CMAP1 = NULL, *ptmmap; 167 caddr_t CADDR1 = NULL; 168 static pt_entry_t *msgbufmap; 169 170 uint64_t KPTphys; 171 172 static PMAP_INLINE void free_pv_entry (pv_entry_t pv); 173 static pv_entry_t get_pv_entry (void); 174 static void i386_protection_init (void); 175 static __inline void pmap_clearbit (vm_page_t m, int bit); 176 177 static void pmap_remove_all (vm_page_t m); 178 static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, 179 vm_offset_t sva); 180 static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); 181 static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, 182 vm_offset_t va); 183 static boolean_t pmap_testbit (vm_page_t m, int bit); 184 static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, 185 vm_page_t mpte, vm_page_t m); 186 187 static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); 188 189 static int pmap_release_free_page (pmap_t pmap, vm_page_t p); 190 static vm_page_t _pmap_allocpte (pmap_t pmap, vm_pindex_t ptepindex); 191 #if JGPMAP32 192 static pt_entry_t * pmap_pte_quick (pmap_t pmap, vm_offset_t va); 193 #endif 194 static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); 195 static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); 196 197 /* 198 * pmap_pte_quick: 199 * 200 * Super fast pmap_pte routine best used when scanning the pv lists. 201 * This eliminates many course-grained invltlb calls. Note that many of 202 * the pv list scans are across different pmaps and it is very wasteful 203 * to do an entire invltlb when checking a single mapping. 204 * 205 * Should only be called while in a critical section. 206 */ 207 #if JGPMAP32 208 static __inline pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t va); 209 210 static pt_entry_t * 211 pmap_pte_quick(pmap_t pmap, vm_offset_t va) 212 { 213 return pmap_pte(pmap, va); 214 } 215 #endif 216 217 /* Return a non-clipped PD index for a given VA */ 218 static __inline vm_pindex_t 219 pmap_pde_pindex(vm_offset_t va) 220 { 221 return va >> PDRSHIFT; 222 } 223 224 /* Return various clipped indexes for a given VA */ 225 static __inline vm_pindex_t 226 pmap_pte_index(vm_offset_t va) 227 { 228 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 229 } 230 231 static __inline vm_pindex_t 232 pmap_pde_index(vm_offset_t va) 233 { 234 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 235 } 236 237 static __inline vm_pindex_t 238 pmap_pdpe_index(vm_offset_t va) 239 { 240 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 241 } 242 243 static __inline vm_pindex_t 244 pmap_pml4e_index(vm_offset_t va) 245 { 246 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 247 } 248 249 /* Return a pointer to the PML4 slot that corresponds to a VA */ 250 static __inline pml4_entry_t * 251 pmap_pml4e(pmap_t pmap, vm_offset_t va) 252 { 253 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 254 } 255 256 /* Return a pointer to the PDP slot that corresponds to a VA */ 257 static __inline pdp_entry_t * 258 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) 259 { 260 pdp_entry_t *pdpe; 261 262 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & VPTE_FRAME); 263 return (&pdpe[pmap_pdpe_index(va)]); 264 } 265 266 /* Return a pointer to the PDP slot that corresponds to a VA */ 267 static __inline pdp_entry_t * 268 pmap_pdpe(pmap_t pmap, vm_offset_t va) 269 { 270 pml4_entry_t *pml4e; 271 272 pml4e = pmap_pml4e(pmap, va); 273 if ((*pml4e & VPTE_V) == 0) 274 return NULL; 275 return (pmap_pml4e_to_pdpe(pml4e, va)); 276 } 277 278 /* Return a pointer to the PD slot that corresponds to a VA */ 279 static __inline pd_entry_t * 280 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) 281 { 282 pd_entry_t *pde; 283 284 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & VPTE_FRAME); 285 return (&pde[pmap_pde_index(va)]); 286 } 287 288 /* Return a pointer to the PD slot that corresponds to a VA */ 289 static __inline pd_entry_t * 290 pmap_pde(pmap_t pmap, vm_offset_t va) 291 { 292 pdp_entry_t *pdpe; 293 294 pdpe = pmap_pdpe(pmap, va); 295 if (pdpe == NULL || (*pdpe & VPTE_V) == 0) 296 return NULL; 297 return (pmap_pdpe_to_pde(pdpe, va)); 298 } 299 300 /* Return a pointer to the PT slot that corresponds to a VA */ 301 static __inline pt_entry_t * 302 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 303 { 304 pt_entry_t *pte; 305 306 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & VPTE_FRAME); 307 return (&pte[pmap_pte_index(va)]); 308 } 309 310 /* Return a pointer to the PT slot that corresponds to a VA */ 311 static __inline pt_entry_t * 312 pmap_pte(pmap_t pmap, vm_offset_t va) 313 { 314 pd_entry_t *pde; 315 316 pde = pmap_pde(pmap, va); 317 if (pde == NULL || (*pde & VPTE_V) == 0) 318 return NULL; 319 if ((*pde & VPTE_PS) != 0) /* compat with i386 pmap_pte() */ 320 return ((pt_entry_t *)pde); 321 return (pmap_pde_to_pte(pde, va)); 322 } 323 324 325 #if JGV 326 PMAP_INLINE pt_entry_t * 327 vtopte(vm_offset_t va) 328 { 329 uint64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + 330 NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 331 332 return (PTmap + ((va >> PAGE_SHIFT) & mask)); 333 } 334 335 static __inline pd_entry_t * 336 vtopde(vm_offset_t va) 337 { 338 uint64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + 339 NPML4EPGSHIFT)) - 1); 340 341 return (PDmap + ((va >> PDRSHIFT) & mask)); 342 } 343 #else 344 static PMAP_INLINE pt_entry_t * 345 vtopte(vm_offset_t va) 346 { 347 pt_entry_t *x; 348 x = pmap_pte(&kernel_pmap, va); 349 assert(x != NULL); 350 return x; 351 } 352 353 static __inline pd_entry_t * 354 vtopde(vm_offset_t va) 355 { 356 pd_entry_t *x; 357 x = pmap_pde(&kernel_pmap, va); 358 assert(x != NULL); 359 return x; 360 } 361 #endif 362 363 static uint64_t 364 allocpages(vm_paddr_t *firstaddr, int n) 365 { 366 uint64_t ret; 367 368 ret = *firstaddr; 369 #if JGV 370 bzero((void *)ret, n * PAGE_SIZE); 371 #endif 372 *firstaddr += n * PAGE_SIZE; 373 return (ret); 374 } 375 376 static void 377 create_dmap_vmm(vm_paddr_t *firstaddr) 378 { 379 void *stack_addr; 380 int pml4_stack_index; 381 int pdp_stack_index; 382 int pd_stack_index; 383 long i,j; 384 int regs[4]; 385 int amd_feature; 386 387 uint64_t KPDP_DMAP_phys = allocpages(firstaddr, NDMPML4E); 388 uint64_t KPDP_VSTACK_phys = allocpages(firstaddr, 1); 389 uint64_t KPD_VSTACK_phys = allocpages(firstaddr, 1); 390 391 pml4_entry_t *KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 392 pdp_entry_t *KPDP_DMAP_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_DMAP_phys); 393 pdp_entry_t *KPDP_VSTACK_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_VSTACK_phys); 394 pd_entry_t *KPD_VSTACK_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_VSTACK_phys); 395 396 bzero(KPDP_DMAP_virt, NDMPML4E * PAGE_SIZE); 397 bzero(KPDP_VSTACK_virt, 1 * PAGE_SIZE); 398 bzero(KPD_VSTACK_virt, 1 * PAGE_SIZE); 399 400 do_cpuid(0x80000001, regs); 401 amd_feature = regs[3]; 402 403 /* Build the mappings for the first 512GB */ 404 if (amd_feature & AMDID_PAGE1GB) { 405 /* In pages of 1 GB, if supported */ 406 for (i = 0; i < NPDPEPG; i++) { 407 KPDP_DMAP_virt[i] = ((uint64_t)i << PDPSHIFT); 408 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 409 } 410 } else { 411 /* In page of 2MB, otherwise */ 412 for (i = 0; i < NPDPEPG; i++) { 413 uint64_t KPD_DMAP_phys = allocpages(firstaddr, 1); 414 pd_entry_t *KPD_DMAP_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_DMAP_phys); 415 416 bzero(KPD_DMAP_virt, PAGE_SIZE); 417 418 KPDP_DMAP_virt[i] = KPD_DMAP_phys; 419 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_U; 420 421 /* For each PD, we have to allocate NPTEPG PT */ 422 for (j = 0; j < NPTEPG; j++) { 423 KPD_DMAP_virt[j] = (i << PDPSHIFT) | (j << PDRSHIFT); 424 KPD_DMAP_virt[j] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 425 } 426 } 427 } 428 429 /* DMAP for the first 512G */ 430 KPML4virt[0] = KPDP_DMAP_phys; 431 KPML4virt[0] |= VPTE_RW | VPTE_V | VPTE_U; 432 433 /* create a 2 MB map of the new stack */ 434 pml4_stack_index = (uint64_t)&stack_addr >> PML4SHIFT; 435 KPML4virt[pml4_stack_index] = KPDP_VSTACK_phys; 436 KPML4virt[pml4_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 437 438 pdp_stack_index = ((uint64_t)&stack_addr & PML4MASK) >> PDPSHIFT; 439 KPDP_VSTACK_virt[pdp_stack_index] = KPD_VSTACK_phys; 440 KPDP_VSTACK_virt[pdp_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 441 442 pd_stack_index = ((uint64_t)&stack_addr & PDPMASK) >> PDRSHIFT; 443 KPD_VSTACK_virt[pd_stack_index] = (uint64_t) vkernel_stack; 444 KPD_VSTACK_virt[pd_stack_index] |= VPTE_RW | VPTE_V | VPTE_U | VPTE_PS; 445 } 446 447 static void 448 create_pagetables(vm_paddr_t *firstaddr, int64_t ptov_offset) 449 { 450 int i; 451 pml4_entry_t *KPML4virt; 452 pdp_entry_t *KPDPvirt; 453 pd_entry_t *KPDvirt; 454 pt_entry_t *KPTvirt; 455 int kpml4i = pmap_pml4e_index(ptov_offset); 456 int kpdpi = pmap_pdpe_index(ptov_offset); 457 int kpdi = pmap_pde_index(ptov_offset); 458 459 /* 460 * Calculate NKPT - number of kernel page tables. We have to 461 * accomodoate prealloction of the vm_page_array, dump bitmap, 462 * MSGBUF_SIZE, and other stuff. Be generous. 463 * 464 * Maxmem is in pages. 465 */ 466 nkpt = (Maxmem * (sizeof(struct vm_page) * 2) + MSGBUF_SIZE) / NBPDR; 467 /* 468 * Allocate pages 469 */ 470 KPML4phys = allocpages(firstaddr, 1); 471 KPDPphys = allocpages(firstaddr, NKPML4E); 472 KPDphys = allocpages(firstaddr, NKPDPE); 473 KPTphys = allocpages(firstaddr, nkpt); 474 475 KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 476 KPDPvirt = (pdp_entry_t *)PHYS_TO_DMAP(KPDPphys); 477 KPDvirt = (pd_entry_t *)PHYS_TO_DMAP(KPDphys); 478 KPTvirt = (pt_entry_t *)PHYS_TO_DMAP(KPTphys); 479 480 bzero(KPML4virt, 1 * PAGE_SIZE); 481 bzero(KPDPvirt, NKPML4E * PAGE_SIZE); 482 bzero(KPDvirt, NKPDPE * PAGE_SIZE); 483 bzero(KPTvirt, nkpt * PAGE_SIZE); 484 485 /* Now map the page tables at their location within PTmap */ 486 for (i = 0; i < nkpt; i++) { 487 KPDvirt[i + kpdi] = KPTphys + (i << PAGE_SHIFT); 488 KPDvirt[i + kpdi] |= VPTE_RW | VPTE_V | VPTE_U; 489 } 490 491 /* And connect up the PD to the PDP */ 492 for (i = 0; i < NKPDPE; i++) { 493 KPDPvirt[i + kpdpi] = KPDphys + (i << PAGE_SHIFT); 494 KPDPvirt[i + kpdpi] |= VPTE_RW | VPTE_V | VPTE_U; 495 } 496 497 /* And recursively map PML4 to itself in order to get PTmap */ 498 KPML4virt[PML4PML4I] = KPML4phys; 499 KPML4virt[PML4PML4I] |= VPTE_RW | VPTE_V | VPTE_U; 500 501 /* Connect the KVA slot up to the PML4 */ 502 KPML4virt[kpml4i] = KPDPphys; 503 KPML4virt[kpml4i] |= VPTE_RW | VPTE_V | VPTE_U; 504 } 505 506 /* 507 * Typically used to initialize a fictitious page by vm/device_pager.c 508 */ 509 void 510 pmap_page_init(struct vm_page *m) 511 { 512 vm_page_init(m); 513 TAILQ_INIT(&m->md.pv_list); 514 } 515 516 /* 517 * Bootstrap the system enough to run with virtual memory. 518 * 519 * On the i386 this is called after mapping has already been enabled 520 * and just syncs the pmap module with what has already been done. 521 * [We can't call it easily with mapping off since the kernel is not 522 * mapped with PA == VA, hence we would have to relocate every address 523 * from the linked base (virtual) address "KERNBASE" to the actual 524 * (physical) address starting relative to 0] 525 */ 526 void 527 pmap_bootstrap(vm_paddr_t *firstaddr, int64_t ptov_offset) 528 { 529 vm_offset_t va; 530 pt_entry_t *pte; 531 532 /* 533 * Create an initial set of page tables to run the kernel in. 534 */ 535 create_pagetables(firstaddr, ptov_offset); 536 537 /* Create the DMAP for the VMM */ 538 if (vmm_enabled) { 539 create_dmap_vmm(firstaddr); 540 } 541 542 virtual_start = KvaStart; 543 virtual_end = KvaEnd; 544 545 /* 546 * Initialize protection array. 547 */ 548 i386_protection_init(); 549 550 /* 551 * The kernel's pmap is statically allocated so we don't have to use 552 * pmap_create, which is unlikely to work correctly at this part of 553 * the boot sequence (XXX and which no longer exists). 554 * 555 * The kernel_pmap's pm_pteobj is used only for locking and not 556 * for mmu pages. 557 */ 558 kernel_pmap.pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 559 kernel_pmap.pm_count = 1; 560 /* don't allow deactivation */ 561 CPUMASK_ASSALLONES(kernel_pmap.pm_active); 562 kernel_pmap.pm_pteobj = NULL; /* see pmap_init */ 563 TAILQ_INIT(&kernel_pmap.pm_pvlist); 564 TAILQ_INIT(&kernel_pmap.pm_pvlist_free); 565 lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok"); 566 spin_init(&kernel_pmap.pm_spin, "pmapbootstrap"); 567 568 /* 569 * Reserve some special page table entries/VA space for temporary 570 * mapping of pages. 571 */ 572 #define SYSMAP(c, p, v, n) \ 573 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 574 575 va = virtual_start; 576 pte = pmap_pte(&kernel_pmap, va); 577 /* 578 * CMAP1/CMAP2 are used for zeroing and copying pages. 579 */ 580 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 581 582 #if JGV 583 /* 584 * Crashdump maps. 585 */ 586 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 587 #endif 588 589 /* 590 * ptvmmap is used for reading arbitrary physical pages via 591 * /dev/mem. 592 */ 593 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 594 595 /* 596 * msgbufp is used to map the system message buffer. 597 * XXX msgbufmap is not used. 598 */ 599 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 600 atop(round_page(MSGBUF_SIZE))) 601 602 virtual_start = va; 603 604 *CMAP1 = 0; 605 /* Not ready to do an invltlb yet for VMM*/ 606 if (!vmm_enabled) 607 cpu_invltlb(); 608 609 } 610 611 /* 612 * Initialize the pmap module. 613 * Called by vm_init, to initialize any structures that the pmap 614 * system needs to map virtual memory. 615 * pmap_init has been enhanced to support in a fairly consistant 616 * way, discontiguous physical memory. 617 */ 618 void 619 pmap_init(void) 620 { 621 int i; 622 int initial_pvs; 623 624 /* 625 * object for kernel page table pages 626 */ 627 /* JG I think the number can be arbitrary */ 628 kptobj = vm_object_allocate(OBJT_DEFAULT, 5); 629 kernel_pmap.pm_pteobj = kptobj; 630 631 /* 632 * Allocate memory for random pmap data structures. Includes the 633 * pv_head_table. 634 */ 635 for(i = 0; i < vm_page_array_size; i++) { 636 vm_page_t m; 637 638 m = &vm_page_array[i]; 639 TAILQ_INIT(&m->md.pv_list); 640 m->md.pv_list_count = 0; 641 } 642 643 /* 644 * init the pv free list 645 */ 646 initial_pvs = vm_page_array_size; 647 if (initial_pvs < MINPV) 648 initial_pvs = MINPV; 649 pvzone = &pvzone_store; 650 pvinit = (struct pv_entry *) 651 kmem_alloc(&kernel_map, 652 initial_pvs * sizeof (struct pv_entry), 653 VM_SUBSYS_PVENTRY); 654 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 655 initial_pvs); 656 657 /* 658 * Now it is safe to enable pv_table recording. 659 */ 660 pmap_initialized = TRUE; 661 } 662 663 /* 664 * Initialize the address space (zone) for the pv_entries. Set a 665 * high water mark so that the system can recover from excessive 666 * numbers of pv entries. 667 */ 668 void 669 pmap_init2(void) 670 { 671 int shpgperproc = PMAP_SHPGPERPROC; 672 673 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 674 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 675 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 676 pv_entry_high_water = 9 * (pv_entry_max / 10); 677 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT); 678 } 679 680 681 /*************************************************** 682 * Low level helper routines..... 683 ***************************************************/ 684 685 /* 686 * The modification bit is not tracked for any pages in this range. XXX 687 * such pages in this maps should always use pmap_k*() functions and not 688 * be managed anyhow. 689 * 690 * XXX User and kernel address spaces are independant for virtual kernels, 691 * this function only applies to the kernel pmap. 692 */ 693 static int 694 pmap_track_modified(pmap_t pmap, vm_offset_t va) 695 { 696 if (pmap != &kernel_pmap) 697 return 1; 698 if ((va < clean_sva) || (va >= clean_eva)) 699 return 1; 700 else 701 return 0; 702 } 703 704 /* 705 * Extract the physical page address associated with the map/VA pair. 706 * 707 * No requirements. 708 */ 709 vm_paddr_t 710 pmap_extract(pmap_t pmap, vm_offset_t va) 711 { 712 vm_paddr_t rtval; 713 pt_entry_t *pte; 714 pd_entry_t pde, *pdep; 715 716 lwkt_gettoken(&vm_token); 717 rtval = 0; 718 pdep = pmap_pde(pmap, va); 719 if (pdep != NULL) { 720 pde = *pdep; 721 if (pde) { 722 if ((pde & VPTE_PS) != 0) { 723 /* JGV */ 724 rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 725 } else { 726 pte = pmap_pde_to_pte(pdep, va); 727 rtval = (*pte & VPTE_FRAME) | (va & PAGE_MASK); 728 } 729 } 730 } 731 lwkt_reltoken(&vm_token); 732 return rtval; 733 } 734 735 /* 736 * Similar to extract but checks protections, SMP-friendly short-cut for 737 * vm_fault_page[_quick](). 738 */ 739 vm_page_t 740 pmap_fault_page_quick(pmap_t pmap __unused, vm_offset_t vaddr __unused, 741 vm_prot_t prot __unused) 742 { 743 return(NULL); 744 } 745 746 /* 747 * Routine: pmap_kextract 748 * Function: 749 * Extract the physical page address associated 750 * kernel virtual address. 751 */ 752 vm_paddr_t 753 pmap_kextract(vm_offset_t va) 754 { 755 pd_entry_t pde; 756 vm_paddr_t pa; 757 758 KKASSERT(va >= KvaStart && va < KvaEnd); 759 760 /* 761 * The DMAP region is not included in [KvaStart, KvaEnd) 762 */ 763 #if 0 764 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 765 pa = DMAP_TO_PHYS(va); 766 } else { 767 #endif 768 pde = *vtopde(va); 769 if (pde & VPTE_PS) { 770 /* JGV */ 771 pa = (pde & PG_PS_FRAME) | (va & PDRMASK); 772 } else { 773 /* 774 * Beware of a concurrent promotion that changes the 775 * PDE at this point! For example, vtopte() must not 776 * be used to access the PTE because it would use the 777 * new PDE. It is, however, safe to use the old PDE 778 * because the page table page is preserved by the 779 * promotion. 780 */ 781 pa = *pmap_pde_to_pte(&pde, va); 782 pa = (pa & VPTE_FRAME) | (va & PAGE_MASK); 783 } 784 #if 0 785 } 786 #endif 787 return pa; 788 } 789 790 /*************************************************** 791 * Low level mapping routines..... 792 ***************************************************/ 793 794 /* 795 * Enter a mapping into kernel_pmap. Mappings created in this fashion 796 * are not managed. Mappings must be immediately accessible on all cpus. 797 * 798 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the 799 * real pmap and handle related races before storing the new vpte. The 800 * new semantics for kenter require use to do an UNCONDITIONAL invalidation, 801 * because the entry may have previously been cleared without an invalidation. 802 */ 803 void 804 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 805 { 806 pt_entry_t *pte; 807 pt_entry_t npte; 808 809 KKASSERT(va >= KvaStart && va < KvaEnd); 810 npte = pa | VPTE_RW | VPTE_V | VPTE_U; 811 pte = vtopte(va); 812 813 #if 1 814 *pte = 0; 815 pmap_inval_pte(pte, &kernel_pmap, va); 816 #else 817 if (*pte & VPTE_V) 818 pmap_inval_pte(pte, &kernel_pmap, va); 819 #endif 820 *pte = npte; 821 } 822 823 /* 824 * Enter an unmanaged KVA mapping for the private use of the current 825 * cpu only. 826 * 827 * It is illegal for the mapping to be accessed by other cpus without 828 * proper invalidation. 829 */ 830 int 831 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) 832 { 833 pt_entry_t *ptep; 834 pt_entry_t npte; 835 int res; 836 837 KKASSERT(va >= KvaStart && va < KvaEnd); 838 839 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 840 ptep = vtopte(va); 841 #if 1 842 res = 1; 843 #else 844 /* FUTURE */ 845 res = (*ptep != 0); 846 #endif 847 848 if (*ptep & VPTE_V) 849 pmap_inval_pte_quick(ptep, &kernel_pmap, va); 850 *ptep = npte; 851 852 return res; 853 } 854 855 int 856 pmap_kenter_noinval(vm_offset_t va, vm_paddr_t pa) 857 { 858 pt_entry_t *ptep; 859 pt_entry_t npte; 860 int res; 861 862 KKASSERT(va >= KvaStart && va < KvaEnd); 863 864 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 865 ptep = vtopte(va); 866 #if 1 867 res = 1; 868 #else 869 /* FUTURE */ 870 res = (*ptep != 0); 871 #endif 872 873 *ptep = npte; 874 875 return res; 876 } 877 878 /* 879 * Remove an unmanaged mapping created with pmap_kenter*(). 880 */ 881 void 882 pmap_kremove(vm_offset_t va) 883 { 884 pt_entry_t *pte; 885 886 KKASSERT(va >= KvaStart && va < KvaEnd); 887 888 pte = vtopte(va); 889 *pte = 0; 890 pmap_inval_pte(pte, &kernel_pmap, va); 891 } 892 893 /* 894 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize 895 * only with this cpu. 896 * 897 * Unfortunately because we optimize new entries by testing VPTE_V later 898 * on, we actually still have to synchronize with all the cpus. XXX maybe 899 * store a junk value and test against 0 in the other places instead? 900 */ 901 void 902 pmap_kremove_quick(vm_offset_t va) 903 { 904 pt_entry_t *pte; 905 906 KKASSERT(va >= KvaStart && va < KvaEnd); 907 908 pte = vtopte(va); 909 *pte = 0; 910 pmap_inval_pte(pte, &kernel_pmap, va); /* NOT _quick */ 911 } 912 913 void 914 pmap_kremove_noinval(vm_offset_t va) 915 { 916 pt_entry_t *pte; 917 918 KKASSERT(va >= KvaStart && va < KvaEnd); 919 920 pte = vtopte(va); 921 *pte = 0; 922 } 923 924 /* 925 * Used to map a range of physical addresses into kernel 926 * virtual address space. 927 * 928 * For now, VM is already on, we only need to map the 929 * specified memory. 930 */ 931 vm_offset_t 932 pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) 933 { 934 return PHYS_TO_DMAP(start); 935 } 936 937 /* 938 * Map a set of unmanaged VM pages into KVM. 939 */ 940 void 941 pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 942 { 943 vm_offset_t end_va; 944 945 end_va = va + count * PAGE_SIZE; 946 KKASSERT(va >= KvaStart && end_va < KvaEnd); 947 948 while (va < end_va) { 949 pt_entry_t *pte; 950 951 pte = vtopte(va); 952 *pte = 0; 953 pmap_inval_pte(pte, &kernel_pmap, va); 954 *pte = VM_PAGE_TO_PHYS(*m) | VPTE_RW | VPTE_V | VPTE_U; 955 va += PAGE_SIZE; 956 m++; 957 } 958 } 959 960 /* 961 * Undo the effects of pmap_qenter*(). 962 */ 963 void 964 pmap_qremove(vm_offset_t va, int count) 965 { 966 vm_offset_t end_va; 967 968 end_va = va + count * PAGE_SIZE; 969 KKASSERT(va >= KvaStart && end_va < KvaEnd); 970 971 while (va < end_va) { 972 pt_entry_t *pte; 973 974 pte = vtopte(va); 975 atomic_swap_long(pte, 0); 976 pmap_inval_pte(pte, &kernel_pmap, va); 977 va += PAGE_SIZE; 978 } 979 } 980 981 void 982 pmap_qremove_quick(vm_offset_t va, int count) 983 { 984 vm_offset_t end_va; 985 986 end_va = va + count * PAGE_SIZE; 987 KKASSERT(va >= KvaStart && end_va < KvaEnd); 988 989 while (va < end_va) { 990 pt_entry_t *pte; 991 992 pte = vtopte(va); 993 atomic_swap_long(pte, 0); 994 cpu_invlpg((void *)va); 995 va += PAGE_SIZE; 996 } 997 } 998 999 void 1000 pmap_qremove_noinval(vm_offset_t va, int count) 1001 { 1002 vm_offset_t end_va; 1003 1004 end_va = va + count * PAGE_SIZE; 1005 KKASSERT(va >= KvaStart && end_va < KvaEnd); 1006 1007 while (va < end_va) { 1008 pt_entry_t *pte; 1009 1010 pte = vtopte(va); 1011 atomic_swap_long(pte, 0); 1012 va += PAGE_SIZE; 1013 } 1014 } 1015 1016 /* 1017 * This routine works like vm_page_lookup() but also blocks as long as the 1018 * page is busy. This routine does not busy the page it returns. 1019 * 1020 * Unless the caller is managing objects whos pages are in a known state, 1021 * the call should be made with a critical section held so the page's object 1022 * association remains valid on return. 1023 */ 1024 static vm_page_t 1025 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 1026 { 1027 vm_page_t m; 1028 1029 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 1030 m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp"); 1031 1032 return(m); 1033 } 1034 1035 /* 1036 * Create a new thread and optionally associate it with a (new) process. 1037 * NOTE! the new thread's cpu may not equal the current cpu. 1038 */ 1039 void 1040 pmap_init_thread(thread_t td) 1041 { 1042 /* enforce pcb placement */ 1043 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; 1044 td->td_savefpu = &td->td_pcb->pcb_save; 1045 td->td_sp = (char *)td->td_pcb - 16; /* JG is -16 needed on x86_64? */ 1046 } 1047 1048 /* 1049 * This routine directly affects the fork perf for a process. 1050 */ 1051 void 1052 pmap_init_proc(struct proc *p) 1053 { 1054 } 1055 1056 /*************************************************** 1057 * Page table page management routines..... 1058 ***************************************************/ 1059 1060 static __inline int pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, 1061 vm_page_t m); 1062 1063 /* 1064 * This routine unholds page table pages, and if the hold count 1065 * drops to zero, then it decrements the wire count. 1066 * 1067 * We must recheck that this is the last hold reference after busy-sleeping 1068 * on the page. 1069 */ 1070 static int 1071 _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 1072 { 1073 vm_page_busy_wait(m, FALSE, "pmuwpt"); 1074 KASSERT(m->queue == PQ_NONE, 1075 ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m)); 1076 1077 if (m->hold_count == 1) { 1078 /* 1079 * Unmap the page table page. 1080 */ 1081 //abort(); /* JG */ 1082 /* pmap_inval_add(info, pmap, -1); */ 1083 1084 if (m->pindex >= (NUPDE + NUPDPE)) { 1085 /* PDP page */ 1086 pml4_entry_t *pml4; 1087 pml4 = pmap_pml4e(pmap, va); 1088 *pml4 = 0; 1089 } else if (m->pindex >= NUPDE) { 1090 /* PD page */ 1091 pdp_entry_t *pdp; 1092 pdp = pmap_pdpe(pmap, va); 1093 *pdp = 0; 1094 } else { 1095 /* PT page */ 1096 pd_entry_t *pd; 1097 pd = pmap_pde(pmap, va); 1098 *pd = 0; 1099 } 1100 1101 KKASSERT(pmap->pm_stats.resident_count > 0); 1102 --pmap->pm_stats.resident_count; 1103 1104 if (pmap->pm_ptphint == m) 1105 pmap->pm_ptphint = NULL; 1106 1107 if (m->pindex < NUPDE) { 1108 /* We just released a PT, unhold the matching PD */ 1109 vm_page_t pdpg; 1110 1111 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & VPTE_FRAME); 1112 pmap_unwire_pte_hold(pmap, va, pdpg); 1113 } 1114 if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { 1115 /* We just released a PD, unhold the matching PDP */ 1116 vm_page_t pdppg; 1117 1118 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & VPTE_FRAME); 1119 pmap_unwire_pte_hold(pmap, va, pdppg); 1120 } 1121 1122 /* 1123 * This was our last hold, the page had better be unwired 1124 * after we decrement wire_count. 1125 * 1126 * FUTURE NOTE: shared page directory page could result in 1127 * multiple wire counts. 1128 */ 1129 vm_page_unhold(m); 1130 --m->wire_count; 1131 KKASSERT(m->wire_count == 0); 1132 atomic_add_int(&vmstats.v_wire_count, -1); 1133 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1134 vm_page_flash(m); 1135 vm_page_free_zero(m); 1136 return 1; 1137 } else { 1138 KKASSERT(m->hold_count > 1); 1139 vm_page_unhold(m); 1140 vm_page_wakeup(m); 1141 return 0; 1142 } 1143 } 1144 1145 static __inline int 1146 pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 1147 { 1148 KKASSERT(m->hold_count > 0); 1149 if (m->hold_count > 1) { 1150 vm_page_unhold(m); 1151 return 0; 1152 } else { 1153 return _pmap_unwire_pte_hold(pmap, va, m); 1154 } 1155 } 1156 1157 /* 1158 * After removing a page table entry, this routine is used to 1159 * conditionally free the page, and manage the hold/wire counts. 1160 */ 1161 static int 1162 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1163 { 1164 /* JG Use FreeBSD/amd64 or FreeBSD/i386 ptepde approaches? */ 1165 vm_pindex_t ptepindex; 1166 1167 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1168 1169 if (mpte == NULL) { 1170 /* 1171 * page table pages in the kernel_pmap are not managed. 1172 */ 1173 if (pmap == &kernel_pmap) 1174 return(0); 1175 ptepindex = pmap_pde_pindex(va); 1176 if (pmap->pm_ptphint && 1177 (pmap->pm_ptphint->pindex == ptepindex)) { 1178 mpte = pmap->pm_ptphint; 1179 } else { 1180 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1181 pmap->pm_ptphint = mpte; 1182 vm_page_wakeup(mpte); 1183 } 1184 } 1185 1186 return pmap_unwire_pte_hold(pmap, va, mpte); 1187 } 1188 1189 /* 1190 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we 1191 * just dummy it up so it works well enough for fork(). 1192 * 1193 * In DragonFly, process pmaps may only be used to manipulate user address 1194 * space, never kernel address space. 1195 */ 1196 void 1197 pmap_pinit0(struct pmap *pmap) 1198 { 1199 pmap_pinit(pmap); 1200 } 1201 1202 /* 1203 * Initialize a preallocated and zeroed pmap structure, 1204 * such as one in a vmspace structure. 1205 */ 1206 void 1207 pmap_pinit(struct pmap *pmap) 1208 { 1209 vm_page_t ptdpg; 1210 1211 /* 1212 * No need to allocate page table space yet but we do need a valid 1213 * page directory table. 1214 */ 1215 if (pmap->pm_pml4 == NULL) { 1216 pmap->pm_pml4 = (pml4_entry_t *) 1217 kmem_alloc_pageable(&kernel_map, PAGE_SIZE, 1218 VM_SUBSYS_PML4); 1219 } 1220 1221 /* 1222 * Allocate an object for the ptes 1223 */ 1224 if (pmap->pm_pteobj == NULL) 1225 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + PML4PML4I + 1); 1226 1227 /* 1228 * Allocate the page directory page, unless we already have 1229 * one cached. If we used the cached page the wire_count will 1230 * already be set appropriately. 1231 */ 1232 if ((ptdpg = pmap->pm_pdirm) == NULL) { 1233 ptdpg = vm_page_grab(pmap->pm_pteobj, 1234 NUPDE + NUPDPE + PML4PML4I, 1235 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | 1236 VM_ALLOC_ZERO); 1237 pmap->pm_pdirm = ptdpg; 1238 vm_page_flag_clear(ptdpg, PG_MAPPED); 1239 vm_page_wire(ptdpg); 1240 vm_page_wakeup(ptdpg); 1241 pmap_kenter((vm_offset_t)pmap->pm_pml4, VM_PAGE_TO_PHYS(ptdpg)); 1242 } 1243 pmap->pm_count = 1; 1244 CPUMASK_ASSZERO(pmap->pm_active); 1245 pmap->pm_ptphint = NULL; 1246 TAILQ_INIT(&pmap->pm_pvlist); 1247 TAILQ_INIT(&pmap->pm_pvlist_free); 1248 spin_init(&pmap->pm_spin, "pmapinit"); 1249 lwkt_token_init(&pmap->pm_token, "pmap_tok"); 1250 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1251 pmap->pm_stats.resident_count = 1; 1252 } 1253 1254 /* 1255 * Clean up a pmap structure so it can be physically freed. This routine 1256 * is called by the vmspace dtor function. A great deal of pmap data is 1257 * left passively mapped to improve vmspace management so we have a bit 1258 * of cleanup work to do here. 1259 * 1260 * No requirements. 1261 */ 1262 void 1263 pmap_puninit(pmap_t pmap) 1264 { 1265 vm_page_t p; 1266 1267 KKASSERT(CPUMASK_TESTZERO(pmap->pm_active)); 1268 if ((p = pmap->pm_pdirm) != NULL) { 1269 KKASSERT(pmap->pm_pml4 != NULL); 1270 pmap_kremove((vm_offset_t)pmap->pm_pml4); 1271 vm_page_busy_wait(p, FALSE, "pgpun"); 1272 p->wire_count--; 1273 atomic_add_int(&vmstats.v_wire_count, -1); 1274 vm_page_free_zero(p); 1275 pmap->pm_pdirm = NULL; 1276 } 1277 if (pmap->pm_pml4) { 1278 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE); 1279 pmap->pm_pml4 = NULL; 1280 } 1281 if (pmap->pm_pteobj) { 1282 vm_object_deallocate(pmap->pm_pteobj); 1283 pmap->pm_pteobj = NULL; 1284 } 1285 } 1286 1287 /* 1288 * This function is now unused (used to add the pmap to the pmap_list) 1289 */ 1290 void 1291 pmap_pinit2(struct pmap *pmap) 1292 { 1293 } 1294 1295 /* 1296 * Attempt to release and free a vm_page in a pmap. Returns 1 on success, 1297 * 0 on failure (if the procedure had to sleep). 1298 * 1299 * When asked to remove the page directory page itself, we actually just 1300 * leave it cached so we do not have to incur the SMP inval overhead of 1301 * removing the kernel mapping. pmap_puninit() will take care of it. 1302 */ 1303 static int 1304 pmap_release_free_page(struct pmap *pmap, vm_page_t p) 1305 { 1306 /* 1307 * This code optimizes the case of freeing non-busy 1308 * page-table pages. Those pages are zero now, and 1309 * might as well be placed directly into the zero queue. 1310 */ 1311 if (vm_page_busy_try(p, FALSE)) { 1312 vm_page_sleep_busy(p, FALSE, "pmaprl"); 1313 return 0; 1314 } 1315 1316 /* 1317 * Remove the page table page from the processes address space. 1318 */ 1319 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1320 /* 1321 * We are the pml4 table itself. 1322 */ 1323 /* XXX anything to do here? */ 1324 } else if (p->pindex >= (NUPDE + NUPDPE)) { 1325 /* 1326 * We are a PDP page. 1327 * We look for the PML4 entry that points to us. 1328 */ 1329 vm_page_t m4 = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + PML4PML4I); 1330 KKASSERT(m4 != NULL); 1331 pml4_entry_t *pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m4)); 1332 int idx = (p->pindex - (NUPDE + NUPDPE)) % NPML4EPG; 1333 KKASSERT(pml4[idx] != 0); 1334 pml4[idx] = 0; 1335 m4->hold_count--; 1336 /* JG What about wire_count? */ 1337 } else if (p->pindex >= NUPDE) { 1338 /* 1339 * We are a PD page. 1340 * We look for the PDP entry that points to us. 1341 */ 1342 vm_page_t m3 = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + (p->pindex - NUPDE) / NPDPEPG); 1343 KKASSERT(m3 != NULL); 1344 pdp_entry_t *pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m3)); 1345 int idx = (p->pindex - NUPDE) % NPDPEPG; 1346 KKASSERT(pdp[idx] != 0); 1347 pdp[idx] = 0; 1348 m3->hold_count--; 1349 /* JG What about wire_count? */ 1350 } else { 1351 /* We are a PT page. 1352 * We look for the PD entry that points to us. 1353 */ 1354 vm_page_t m2 = vm_page_lookup(pmap->pm_pteobj, NUPDE + p->pindex / NPDEPG); 1355 KKASSERT(m2 != NULL); 1356 pd_entry_t *pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m2)); 1357 int idx = p->pindex % NPDEPG; 1358 pd[idx] = 0; 1359 m2->hold_count--; 1360 /* JG What about wire_count? */ 1361 } 1362 KKASSERT(pmap->pm_stats.resident_count > 0); 1363 --pmap->pm_stats.resident_count; 1364 1365 if (p->hold_count) { 1366 panic("pmap_release: freeing held pt page " 1367 "pmap=%p pg=%p dmap=%p pi=%ld {%ld,%ld,%ld}", 1368 pmap, p, (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(p)), 1369 p->pindex, NUPDE, NUPDPE, PML4PML4I); 1370 } 1371 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) 1372 pmap->pm_ptphint = NULL; 1373 1374 /* 1375 * We leave the top-level page table page cached, wired, and mapped in 1376 * the pmap until the dtor function (pmap_puninit()) gets called. 1377 * However, still clean it up. 1378 */ 1379 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1380 bzero(pmap->pm_pml4, PAGE_SIZE); 1381 vm_page_wakeup(p); 1382 } else { 1383 abort(); 1384 p->wire_count--; 1385 atomic_add_int(&vmstats.v_wire_count, -1); 1386 /* JG eventually revert to using vm_page_free_zero() */ 1387 vm_page_free(p); 1388 } 1389 return 1; 1390 } 1391 1392 /* 1393 * this routine is called if the page table page is not 1394 * mapped correctly. 1395 */ 1396 static vm_page_t 1397 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex) 1398 { 1399 vm_page_t m, pdppg, pdpg; 1400 1401 /* 1402 * Find or fabricate a new pagetable page. Handle allocation 1403 * races by checking m->valid. 1404 */ 1405 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1406 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1407 1408 KASSERT(m->queue == PQ_NONE, 1409 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1410 1411 /* 1412 * Increment the hold count for the page we will be returning to 1413 * the caller. 1414 */ 1415 m->hold_count++; 1416 vm_page_wire(m); 1417 1418 /* 1419 * Map the pagetable page into the process address space, if 1420 * it isn't already there. 1421 */ 1422 ++pmap->pm_stats.resident_count; 1423 1424 if (ptepindex >= (NUPDE + NUPDPE)) { 1425 pml4_entry_t *pml4; 1426 vm_pindex_t pml4index; 1427 1428 /* Wire up a new PDP page */ 1429 pml4index = ptepindex - (NUPDE + NUPDPE); 1430 pml4 = &pmap->pm_pml4[pml4index]; 1431 *pml4 = VM_PAGE_TO_PHYS(m) | 1432 VPTE_RW | VPTE_V | VPTE_U | 1433 VPTE_A | VPTE_M; 1434 } else if (ptepindex >= NUPDE) { 1435 vm_pindex_t pml4index; 1436 vm_pindex_t pdpindex; 1437 pml4_entry_t *pml4; 1438 pdp_entry_t *pdp; 1439 1440 /* Wire up a new PD page */ 1441 pdpindex = ptepindex - NUPDE; 1442 pml4index = pdpindex >> NPML4EPGSHIFT; 1443 1444 pml4 = &pmap->pm_pml4[pml4index]; 1445 if ((*pml4 & VPTE_V) == 0) { 1446 /* Have to allocate a new PDP page, recurse */ 1447 if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index) 1448 == NULL) { 1449 --m->wire_count; 1450 vm_page_free(m); 1451 return (NULL); 1452 } 1453 } else { 1454 /* Add reference to the PDP page */ 1455 pdppg = PHYS_TO_VM_PAGE(*pml4 & VPTE_FRAME); 1456 pdppg->hold_count++; 1457 } 1458 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1459 1460 /* Now find the pdp page */ 1461 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1462 KKASSERT(*pdp == 0); /* JG DEBUG64 */ 1463 *pdp = VM_PAGE_TO_PHYS(m) | VPTE_RW | VPTE_V | VPTE_U | 1464 VPTE_A | VPTE_M; 1465 } else { 1466 vm_pindex_t pml4index; 1467 vm_pindex_t pdpindex; 1468 pml4_entry_t *pml4; 1469 pdp_entry_t *pdp; 1470 pd_entry_t *pd; 1471 1472 /* Wire up a new PT page */ 1473 pdpindex = ptepindex >> NPDPEPGSHIFT; 1474 pml4index = pdpindex >> NPML4EPGSHIFT; 1475 1476 /* First, find the pdp and check that its valid. */ 1477 pml4 = &pmap->pm_pml4[pml4index]; 1478 if ((*pml4 & VPTE_V) == 0) { 1479 /* We miss a PDP page. We ultimately need a PD page. 1480 * Recursively allocating a PD page will allocate 1481 * the missing PDP page and will also allocate 1482 * the PD page we need. 1483 */ 1484 /* Have to allocate a new PD page, recurse */ 1485 if (_pmap_allocpte(pmap, NUPDE + pdpindex) 1486 == NULL) { 1487 --m->wire_count; 1488 vm_page_free(m); 1489 return (NULL); 1490 } 1491 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1492 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1493 } else { 1494 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1495 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1496 if ((*pdp & VPTE_V) == 0) { 1497 /* Have to allocate a new PD page, recurse */ 1498 if (_pmap_allocpte(pmap, NUPDE + pdpindex) 1499 == NULL) { 1500 --m->wire_count; 1501 vm_page_free(m); 1502 return (NULL); 1503 } 1504 } else { 1505 /* Add reference to the PD page */ 1506 pdpg = PHYS_TO_VM_PAGE(*pdp & VPTE_FRAME); 1507 pdpg->hold_count++; 1508 } 1509 } 1510 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & VPTE_FRAME); 1511 1512 /* Now we know where the page directory page is */ 1513 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; 1514 KKASSERT(*pd == 0); /* JG DEBUG64 */ 1515 *pd = VM_PAGE_TO_PHYS(m) | VPTE_RW | VPTE_V | VPTE_U | 1516 VPTE_A | VPTE_M; 1517 } 1518 1519 /* 1520 * Set the page table hint 1521 */ 1522 pmap->pm_ptphint = m; 1523 vm_page_flag_set(m, PG_MAPPED); 1524 vm_page_wakeup(m); 1525 1526 return m; 1527 } 1528 1529 /* 1530 * Determine the page table page required to access the VA in the pmap 1531 * and allocate it if necessary. Return a held vm_page_t for the page. 1532 * 1533 * Only used with user pmaps. 1534 */ 1535 static vm_page_t 1536 pmap_allocpte(pmap_t pmap, vm_offset_t va) 1537 { 1538 vm_pindex_t ptepindex; 1539 pd_entry_t *pd; 1540 vm_page_t m; 1541 1542 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1543 1544 /* 1545 * Calculate pagetable page index 1546 */ 1547 ptepindex = pmap_pde_pindex(va); 1548 1549 /* 1550 * Get the page directory entry 1551 */ 1552 pd = pmap_pde(pmap, va); 1553 1554 /* 1555 * This supports switching from a 2MB page to a 1556 * normal 4K page. 1557 */ 1558 if (pd != NULL && (*pd & (VPTE_PS | VPTE_V)) == (VPTE_PS | VPTE_V)) { 1559 panic("no promotion/demotion yet"); 1560 *pd = 0; 1561 pd = NULL; 1562 /*cpu_invltlb();*/ 1563 /*smp_invltlb();*/ 1564 } 1565 1566 /* 1567 * If the page table page is mapped, we just increment the 1568 * hold count, and activate it. 1569 */ 1570 if (pd != NULL && (*pd & VPTE_V) != 0) { 1571 /* YYY hint is used here on i386 */ 1572 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1573 pmap->pm_ptphint = m; 1574 vm_page_hold(m); 1575 vm_page_wakeup(m); 1576 return m; 1577 } 1578 /* 1579 * Here if the pte page isn't mapped, or if it has been deallocated. 1580 */ 1581 return _pmap_allocpte(pmap, ptepindex); 1582 } 1583 1584 1585 /*************************************************** 1586 * Pmap allocation/deallocation routines. 1587 ***************************************************/ 1588 1589 /* 1590 * Release any resources held by the given physical map. 1591 * Called when a pmap initialized by pmap_pinit is being released. 1592 * Should only be called if the map contains no valid mappings. 1593 * 1594 * Caller must hold pmap->pm_token 1595 */ 1596 static int pmap_release_callback(struct vm_page *p, void *data); 1597 1598 void 1599 pmap_release(struct pmap *pmap) 1600 { 1601 vm_object_t object = pmap->pm_pteobj; 1602 struct rb_vm_page_scan_info info; 1603 1604 KKASSERT(pmap != &kernel_pmap); 1605 1606 lwkt_gettoken(&vm_token); 1607 #if defined(DIAGNOSTIC) 1608 if (object->ref_count != 1) 1609 panic("pmap_release: pteobj reference count != 1"); 1610 #endif 1611 1612 info.pmap = pmap; 1613 info.object = object; 1614 1615 KASSERT(CPUMASK_TESTZERO(pmap->pm_active), 1616 ("pmap %p still active! %016jx", 1617 pmap, 1618 (uintmax_t)CPUMASK_LOWMASK(pmap->pm_active))); 1619 1620 vm_object_hold(object); 1621 do { 1622 info.error = 0; 1623 info.mpte = NULL; 1624 info.limit = object->generation; 1625 1626 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, 1627 pmap_release_callback, &info); 1628 if (info.error == 0 && info.mpte) { 1629 if (!pmap_release_free_page(pmap, info.mpte)) 1630 info.error = 1; 1631 } 1632 } while (info.error); 1633 vm_object_drop(object); 1634 lwkt_reltoken(&vm_token); 1635 } 1636 1637 static int 1638 pmap_release_callback(struct vm_page *p, void *data) 1639 { 1640 struct rb_vm_page_scan_info *info = data; 1641 1642 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1643 info->mpte = p; 1644 return(0); 1645 } 1646 if (!pmap_release_free_page(info->pmap, p)) { 1647 info->error = 1; 1648 return(-1); 1649 } 1650 if (info->object->generation != info->limit) { 1651 info->error = 1; 1652 return(-1); 1653 } 1654 return(0); 1655 } 1656 1657 /* 1658 * Grow the number of kernel page table entries, if needed. 1659 * 1660 * No requirements. 1661 */ 1662 void 1663 pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) 1664 { 1665 vm_offset_t addr; 1666 vm_paddr_t paddr; 1667 vm_offset_t ptppaddr; 1668 vm_page_t nkpg; 1669 pd_entry_t *pde, newpdir; 1670 pdp_entry_t newpdp; 1671 1672 addr = kend; 1673 1674 vm_object_hold(kptobj); 1675 if (kernel_vm_end == 0) { 1676 kernel_vm_end = KvaStart; 1677 nkpt = 0; 1678 while ((*pmap_pde(&kernel_pmap, kernel_vm_end) & VPTE_V) != 0) { 1679 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1680 nkpt++; 1681 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1682 kernel_vm_end = kernel_map.max_offset; 1683 break; 1684 } 1685 } 1686 } 1687 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1688 if (addr - 1 >= kernel_map.max_offset) 1689 addr = kernel_map.max_offset; 1690 while (kernel_vm_end < addr) { 1691 pde = pmap_pde(&kernel_pmap, kernel_vm_end); 1692 if (pde == NULL) { 1693 /* We need a new PDP entry */ 1694 nkpg = vm_page_alloc(kptobj, nkpt, 1695 VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM 1696 | VM_ALLOC_INTERRUPT); 1697 if (nkpg == NULL) { 1698 panic("pmap_growkernel: no memory to " 1699 "grow kernel"); 1700 } 1701 paddr = VM_PAGE_TO_PHYS(nkpg); 1702 pmap_zero_page(paddr); 1703 newpdp = (pdp_entry_t)(paddr | 1704 VPTE_V | VPTE_RW | VPTE_U | 1705 VPTE_A | VPTE_M); 1706 *pmap_pdpe(&kernel_pmap, kernel_vm_end) = newpdp; 1707 nkpt++; 1708 continue; /* try again */ 1709 } 1710 if ((*pde & VPTE_V) != 0) { 1711 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1712 ~(PAGE_SIZE * NPTEPG - 1); 1713 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1714 kernel_vm_end = kernel_map.max_offset; 1715 break; 1716 } 1717 continue; 1718 } 1719 1720 /* 1721 * This index is bogus, but out of the way 1722 */ 1723 nkpg = vm_page_alloc(kptobj, nkpt, 1724 VM_ALLOC_NORMAL | 1725 VM_ALLOC_SYSTEM | 1726 VM_ALLOC_INTERRUPT); 1727 if (nkpg == NULL) 1728 panic("pmap_growkernel: no memory to grow kernel"); 1729 1730 vm_page_wire(nkpg); 1731 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1732 pmap_zero_page(ptppaddr); 1733 newpdir = (pd_entry_t)(ptppaddr | 1734 VPTE_V | VPTE_RW | VPTE_U | 1735 VPTE_A | VPTE_M); 1736 *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir; 1737 nkpt++; 1738 1739 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1740 ~(PAGE_SIZE * NPTEPG - 1); 1741 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1742 kernel_vm_end = kernel_map.max_offset; 1743 break; 1744 } 1745 } 1746 vm_object_drop(kptobj); 1747 } 1748 1749 /* 1750 * Add a reference to the specified pmap. 1751 * 1752 * No requirements. 1753 */ 1754 void 1755 pmap_reference(pmap_t pmap) 1756 { 1757 if (pmap) { 1758 lwkt_gettoken(&vm_token); 1759 ++pmap->pm_count; 1760 lwkt_reltoken(&vm_token); 1761 } 1762 } 1763 1764 /************************************************************************ 1765 * VMSPACE MANAGEMENT * 1766 ************************************************************************ 1767 * 1768 * The VMSPACE management we do in our virtual kernel must be reflected 1769 * in the real kernel. This is accomplished by making vmspace system 1770 * calls to the real kernel. 1771 */ 1772 void 1773 cpu_vmspace_alloc(struct vmspace *vm) 1774 { 1775 int r; 1776 void *rp; 1777 vpte_t vpte; 1778 1779 /* 1780 * If VMM enable, don't do nothing, we 1781 * are able to use real page tables 1782 */ 1783 if (vmm_enabled) 1784 return; 1785 1786 #define USER_SIZE (VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) 1787 1788 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) 1789 panic("vmspace_create() failed"); 1790 1791 rp = vmspace_mmap(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1792 PROT_READ|PROT_WRITE, 1793 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, 1794 MemImageFd, 0); 1795 if (rp == MAP_FAILED) 1796 panic("vmspace_mmap: failed"); 1797 vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1798 MADV_NOSYNC, 0); 1799 vpte = VM_PAGE_TO_PHYS(vmspace_pmap(vm)->pm_pdirm) | VPTE_RW | VPTE_V | VPTE_U; 1800 r = vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1801 MADV_SETMAP, vpte); 1802 if (r < 0) 1803 panic("vmspace_mcontrol: failed"); 1804 } 1805 1806 void 1807 cpu_vmspace_free(struct vmspace *vm) 1808 { 1809 /* 1810 * If VMM enable, don't do nothing, we 1811 * are able to use real page tables 1812 */ 1813 if (vmm_enabled) 1814 return; 1815 1816 if (vmspace_destroy(&vm->vm_pmap) < 0) 1817 panic("vmspace_destroy() failed"); 1818 } 1819 1820 /*************************************************** 1821 * page management routines. 1822 ***************************************************/ 1823 1824 /* 1825 * free the pv_entry back to the free list. This function may be 1826 * called from an interrupt. 1827 */ 1828 static __inline void 1829 free_pv_entry(pv_entry_t pv) 1830 { 1831 pv_entry_count--; 1832 KKASSERT(pv_entry_count >= 0); 1833 zfree(pvzone, pv); 1834 } 1835 1836 /* 1837 * get a new pv_entry, allocating a block from the system 1838 * when needed. This function may be called from an interrupt. 1839 */ 1840 static pv_entry_t 1841 get_pv_entry(void) 1842 { 1843 pv_entry_count++; 1844 if (pv_entry_high_water && 1845 (pv_entry_count > pv_entry_high_water) && 1846 (pmap_pagedaemon_waken == 0)) { 1847 pmap_pagedaemon_waken = 1; 1848 wakeup(&vm_pages_needed); 1849 } 1850 return zalloc(pvzone); 1851 } 1852 1853 /* 1854 * This routine is very drastic, but can save the system 1855 * in a pinch. 1856 * 1857 * No requirements. 1858 */ 1859 void 1860 pmap_collect(void) 1861 { 1862 int i; 1863 vm_page_t m; 1864 static int warningdone=0; 1865 1866 if (pmap_pagedaemon_waken == 0) 1867 return; 1868 lwkt_gettoken(&vm_token); 1869 pmap_pagedaemon_waken = 0; 1870 1871 if (warningdone < 5) { 1872 kprintf("pmap_collect: collecting pv entries -- " 1873 "suggest increasing PMAP_SHPGPERPROC\n"); 1874 warningdone++; 1875 } 1876 1877 for (i = 0; i < vm_page_array_size; i++) { 1878 m = &vm_page_array[i]; 1879 if (m->wire_count || m->hold_count) 1880 continue; 1881 if (vm_page_busy_try(m, TRUE) == 0) { 1882 if (m->wire_count == 0 && m->hold_count == 0) { 1883 pmap_remove_all(m); 1884 } 1885 vm_page_wakeup(m); 1886 } 1887 } 1888 lwkt_reltoken(&vm_token); 1889 } 1890 1891 1892 /* 1893 * If it is the first entry on the list, it is actually 1894 * in the header and we must copy the following entry up 1895 * to the header. Otherwise we must search the list for 1896 * the entry. In either case we free the now unused entry. 1897 * 1898 * caller must hold vm_token. 1899 */ 1900 static int 1901 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1902 { 1903 pv_entry_t pv; 1904 int rtval; 1905 1906 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1907 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1908 if (pmap == pv->pv_pmap && va == pv->pv_va) 1909 break; 1910 } 1911 } else { 1912 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1913 if (va == pv->pv_va) 1914 break; 1915 } 1916 } 1917 1918 /* 1919 * Note that pv_ptem is NULL if the page table page itself is not 1920 * managed, even if the page being removed IS managed. 1921 */ 1922 rtval = 0; 1923 /* JGXXX When can 'pv' be NULL? */ 1924 if (pv) { 1925 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1926 m->md.pv_list_count--; 1927 atomic_add_int(&m->object->agg_pv_list_count, -1); 1928 KKASSERT(m->md.pv_list_count >= 0); 1929 if (TAILQ_EMPTY(&m->md.pv_list)) 1930 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1931 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1932 ++pmap->pm_generation; 1933 KKASSERT(pmap->pm_pteobj != NULL); 1934 vm_object_hold(pmap->pm_pteobj); 1935 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1936 vm_object_drop(pmap->pm_pteobj); 1937 free_pv_entry(pv); 1938 } 1939 return rtval; 1940 } 1941 1942 /* 1943 * Create a pv entry for page at pa for (pmap, va). If the page table page 1944 * holding the VA is managed, mpte will be non-NULL. 1945 */ 1946 static void 1947 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1948 { 1949 pv_entry_t pv; 1950 1951 crit_enter(); 1952 pv = get_pv_entry(); 1953 pv->pv_va = va; 1954 pv->pv_pmap = pmap; 1955 pv->pv_ptem = mpte; 1956 1957 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1958 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1959 m->md.pv_list_count++; 1960 atomic_add_int(&m->object->agg_pv_list_count, 1); 1961 1962 crit_exit(); 1963 } 1964 1965 /* 1966 * pmap_remove_pte: do the things to unmap a page in a process 1967 */ 1968 static int 1969 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1970 { 1971 pt_entry_t oldpte; 1972 vm_page_t m; 1973 1974 oldpte = pmap_inval_loadandclear(ptq, pmap, va); 1975 if (oldpte & VPTE_WIRED) 1976 --pmap->pm_stats.wired_count; 1977 KKASSERT(pmap->pm_stats.wired_count >= 0); 1978 1979 #if 0 1980 /* 1981 * Machines that don't support invlpg, also don't support 1982 * PG_G. XXX PG_G is disabled for SMP so don't worry about 1983 * the SMP case. 1984 */ 1985 if (oldpte & PG_G) 1986 cpu_invlpg((void *)va); 1987 #endif 1988 KKASSERT(pmap->pm_stats.resident_count > 0); 1989 --pmap->pm_stats.resident_count; 1990 if (oldpte & VPTE_MANAGED) { 1991 m = PHYS_TO_VM_PAGE(oldpte); 1992 if (oldpte & VPTE_M) { 1993 #if defined(PMAP_DIAGNOSTIC) 1994 if (pmap_nw_modified(oldpte)) { 1995 kprintf("pmap_remove: modified page not " 1996 "writable: va: 0x%lx, pte: 0x%lx\n", 1997 va, oldpte); 1998 } 1999 #endif 2000 if (pmap_track_modified(pmap, va)) 2001 vm_page_dirty(m); 2002 } 2003 if (oldpte & VPTE_A) 2004 vm_page_flag_set(m, PG_REFERENCED); 2005 return pmap_remove_entry(pmap, m, va); 2006 } else { 2007 return pmap_unuse_pt(pmap, va, NULL); 2008 } 2009 2010 return 0; 2011 } 2012 2013 /* 2014 * pmap_remove_page: 2015 * 2016 * Remove a single page from a process address space. 2017 * 2018 * This function may not be called from an interrupt if the pmap is 2019 * not kernel_pmap. 2020 */ 2021 static void 2022 pmap_remove_page(struct pmap *pmap, vm_offset_t va) 2023 { 2024 pt_entry_t *pte; 2025 2026 pte = pmap_pte(pmap, va); 2027 if (pte == NULL) 2028 return; 2029 if ((*pte & VPTE_V) == 0) 2030 return; 2031 pmap_remove_pte(pmap, pte, va); 2032 } 2033 2034 /* 2035 * Remove the given range of addresses from the specified map. 2036 * 2037 * It is assumed that the start and end are properly rounded to 2038 * the page size. 2039 * 2040 * This function may not be called from an interrupt if the pmap is 2041 * not kernel_pmap. 2042 * 2043 * No requirements. 2044 */ 2045 void 2046 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 2047 { 2048 vm_offset_t va_next; 2049 pml4_entry_t *pml4e; 2050 pdp_entry_t *pdpe; 2051 pd_entry_t ptpaddr, *pde; 2052 pt_entry_t *pte; 2053 2054 if (pmap == NULL) 2055 return; 2056 2057 vm_object_hold(pmap->pm_pteobj); 2058 lwkt_gettoken(&vm_token); 2059 KKASSERT(pmap->pm_stats.resident_count >= 0); 2060 if (pmap->pm_stats.resident_count == 0) { 2061 lwkt_reltoken(&vm_token); 2062 vm_object_drop(pmap->pm_pteobj); 2063 return; 2064 } 2065 2066 /* 2067 * special handling of removing one page. a very 2068 * common operation and easy to short circuit some 2069 * code. 2070 */ 2071 if (sva + PAGE_SIZE == eva) { 2072 pde = pmap_pde(pmap, sva); 2073 if (pde && (*pde & VPTE_PS) == 0) { 2074 pmap_remove_page(pmap, sva); 2075 lwkt_reltoken(&vm_token); 2076 vm_object_drop(pmap->pm_pteobj); 2077 return; 2078 } 2079 } 2080 2081 for (; sva < eva; sva = va_next) { 2082 pml4e = pmap_pml4e(pmap, sva); 2083 if ((*pml4e & VPTE_V) == 0) { 2084 va_next = (sva + NBPML4) & ~PML4MASK; 2085 if (va_next < sva) 2086 va_next = eva; 2087 continue; 2088 } 2089 2090 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2091 if ((*pdpe & VPTE_V) == 0) { 2092 va_next = (sva + NBPDP) & ~PDPMASK; 2093 if (va_next < sva) 2094 va_next = eva; 2095 continue; 2096 } 2097 2098 /* 2099 * Calculate index for next page table. 2100 */ 2101 va_next = (sva + NBPDR) & ~PDRMASK; 2102 if (va_next < sva) 2103 va_next = eva; 2104 2105 pde = pmap_pdpe_to_pde(pdpe, sva); 2106 ptpaddr = *pde; 2107 2108 /* 2109 * Weed out invalid mappings. 2110 */ 2111 if (ptpaddr == 0) 2112 continue; 2113 2114 /* 2115 * Check for large page. 2116 */ 2117 if ((ptpaddr & VPTE_PS) != 0) { 2118 /* JG FreeBSD has more complex treatment here */ 2119 KKASSERT(*pde != 0); 2120 pmap_inval_pde(pde, pmap, sva); 2121 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2122 continue; 2123 } 2124 2125 /* 2126 * Limit our scan to either the end of the va represented 2127 * by the current page table page, or to the end of the 2128 * range being removed. 2129 */ 2130 if (va_next > eva) 2131 va_next = eva; 2132 2133 /* 2134 * NOTE: pmap_remove_pte() can block. 2135 */ 2136 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2137 sva += PAGE_SIZE) { 2138 if (*pte == 0) 2139 continue; 2140 if (pmap_remove_pte(pmap, pte, sva)) 2141 break; 2142 } 2143 } 2144 lwkt_reltoken(&vm_token); 2145 vm_object_drop(pmap->pm_pteobj); 2146 } 2147 2148 /* 2149 * Removes this physical page from all physical maps in which it resides. 2150 * Reflects back modify bits to the pager. 2151 * 2152 * This routine may not be called from an interrupt. 2153 * 2154 * No requirements. 2155 */ 2156 static void 2157 pmap_remove_all(vm_page_t m) 2158 { 2159 pt_entry_t *pte, tpte; 2160 pv_entry_t pv; 2161 2162 #if defined(PMAP_DIAGNOSTIC) 2163 /* 2164 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 2165 * pages! 2166 */ 2167 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2168 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); 2169 } 2170 #endif 2171 2172 lwkt_gettoken(&vm_token); 2173 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2174 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0); 2175 --pv->pv_pmap->pm_stats.resident_count; 2176 2177 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2178 KKASSERT(pte != NULL); 2179 2180 tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va); 2181 if (tpte & VPTE_WIRED) 2182 pv->pv_pmap->pm_stats.wired_count--; 2183 KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0); 2184 2185 if (tpte & VPTE_A) 2186 vm_page_flag_set(m, PG_REFERENCED); 2187 2188 /* 2189 * Update the vm_page_t clean and reference bits. 2190 */ 2191 if (tpte & VPTE_M) { 2192 #if defined(PMAP_DIAGNOSTIC) 2193 if (pmap_nw_modified(tpte)) { 2194 kprintf( 2195 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 2196 pv->pv_va, tpte); 2197 } 2198 #endif 2199 if (pmap_track_modified(pv->pv_pmap, pv->pv_va)) 2200 vm_page_dirty(m); 2201 } 2202 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2203 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2204 ++pv->pv_pmap->pm_generation; 2205 m->md.pv_list_count--; 2206 atomic_add_int(&m->object->agg_pv_list_count, -1); 2207 KKASSERT(m->md.pv_list_count >= 0); 2208 if (TAILQ_EMPTY(&m->md.pv_list)) 2209 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2210 vm_object_hold(pv->pv_pmap->pm_pteobj); 2211 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2212 vm_object_drop(pv->pv_pmap->pm_pteobj); 2213 free_pv_entry(pv); 2214 } 2215 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); 2216 lwkt_reltoken(&vm_token); 2217 } 2218 2219 /* 2220 * Removes the page from a particular pmap 2221 */ 2222 void 2223 pmap_remove_specific(pmap_t pmap, vm_page_t m) 2224 { 2225 pt_entry_t *pte, tpte; 2226 pv_entry_t pv; 2227 2228 lwkt_gettoken(&vm_token); 2229 again: 2230 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2231 if (pv->pv_pmap != pmap) 2232 continue; 2233 2234 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0); 2235 --pv->pv_pmap->pm_stats.resident_count; 2236 2237 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2238 KKASSERT(pte != NULL); 2239 2240 tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va); 2241 if (tpte & VPTE_WIRED) 2242 pv->pv_pmap->pm_stats.wired_count--; 2243 KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0); 2244 2245 if (tpte & VPTE_A) 2246 vm_page_flag_set(m, PG_REFERENCED); 2247 2248 /* 2249 * Update the vm_page_t clean and reference bits. 2250 */ 2251 if (tpte & VPTE_M) { 2252 if (pmap_track_modified(pv->pv_pmap, pv->pv_va)) 2253 vm_page_dirty(m); 2254 } 2255 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2256 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2257 ++pv->pv_pmap->pm_generation; 2258 m->md.pv_list_count--; 2259 atomic_add_int(&m->object->agg_pv_list_count, -1); 2260 KKASSERT(m->md.pv_list_count >= 0); 2261 if (TAILQ_EMPTY(&m->md.pv_list)) 2262 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2263 vm_object_hold(pv->pv_pmap->pm_pteobj); 2264 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2265 vm_object_drop(pv->pv_pmap->pm_pteobj); 2266 free_pv_entry(pv); 2267 goto again; 2268 } 2269 lwkt_reltoken(&vm_token); 2270 } 2271 2272 /* 2273 * Set the physical protection on the specified range of this map 2274 * as requested. 2275 * 2276 * This function may not be called from an interrupt if the map is 2277 * not the kernel_pmap. 2278 * 2279 * No requirements. 2280 */ 2281 void 2282 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2283 { 2284 vm_offset_t va_next; 2285 pml4_entry_t *pml4e; 2286 pdp_entry_t *pdpe; 2287 pd_entry_t ptpaddr, *pde; 2288 pt_entry_t *pte; 2289 2290 /* JG review for NX */ 2291 2292 if (pmap == NULL) 2293 return; 2294 2295 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2296 pmap_remove(pmap, sva, eva); 2297 return; 2298 } 2299 2300 if (prot & VM_PROT_WRITE) 2301 return; 2302 2303 lwkt_gettoken(&vm_token); 2304 2305 for (; sva < eva; sva = va_next) { 2306 2307 pml4e = pmap_pml4e(pmap, sva); 2308 if ((*pml4e & VPTE_V) == 0) { 2309 va_next = (sva + NBPML4) & ~PML4MASK; 2310 if (va_next < sva) 2311 va_next = eva; 2312 continue; 2313 } 2314 2315 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2316 if ((*pdpe & VPTE_V) == 0) { 2317 va_next = (sva + NBPDP) & ~PDPMASK; 2318 if (va_next < sva) 2319 va_next = eva; 2320 continue; 2321 } 2322 2323 va_next = (sva + NBPDR) & ~PDRMASK; 2324 if (va_next < sva) 2325 va_next = eva; 2326 2327 pde = pmap_pdpe_to_pde(pdpe, sva); 2328 ptpaddr = *pde; 2329 2330 /* 2331 * Check for large page. 2332 */ 2333 if ((ptpaddr & VPTE_PS) != 0) { 2334 /* JG correct? */ 2335 pmap_clean_pde(pde, pmap, sva); 2336 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2337 continue; 2338 } 2339 2340 /* 2341 * Weed out invalid mappings. Note: we assume that the page 2342 * directory table is always allocated, and in kernel virtual. 2343 */ 2344 if (ptpaddr == 0) 2345 continue; 2346 2347 if (va_next > eva) 2348 va_next = eva; 2349 2350 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2351 sva += PAGE_SIZE) { 2352 pt_entry_t pbits; 2353 vm_page_t m; 2354 2355 /* 2356 * Clean managed pages and also check the accessed 2357 * bit. Just remove write perms for unmanaged 2358 * pages. Be careful of races, turning off write 2359 * access will force a fault rather then setting 2360 * the modified bit at an unexpected time. 2361 */ 2362 if (*pte & VPTE_MANAGED) { 2363 pbits = pmap_clean_pte(pte, pmap, sva); 2364 m = NULL; 2365 if (pbits & VPTE_A) { 2366 m = PHYS_TO_VM_PAGE(pbits & VPTE_FRAME); 2367 vm_page_flag_set(m, PG_REFERENCED); 2368 atomic_clear_long(pte, VPTE_A); 2369 } 2370 if (pbits & VPTE_M) { 2371 if (pmap_track_modified(pmap, sva)) { 2372 if (m == NULL) 2373 m = PHYS_TO_VM_PAGE(pbits & VPTE_FRAME); 2374 vm_page_dirty(m); 2375 } 2376 } 2377 } else { 2378 pbits = pmap_setro_pte(pte, pmap, sva); 2379 } 2380 } 2381 } 2382 lwkt_reltoken(&vm_token); 2383 } 2384 2385 /* 2386 * Enter a managed page into a pmap. If the page is not wired related pmap 2387 * data can be destroyed at any time for later demand-operation. 2388 * 2389 * Insert the vm_page (m) at virtual address (v) in (pmap), with the 2390 * specified protection, and wire the mapping if requested. 2391 * 2392 * NOTE: This routine may not lazy-evaluate or lose information. The 2393 * page must actually be inserted into the given map NOW. 2394 * 2395 * NOTE: When entering a page at a KVA address, the pmap must be the 2396 * kernel_pmap. 2397 * 2398 * No requirements. 2399 */ 2400 void 2401 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2402 boolean_t wired, vm_map_entry_t entry __unused) 2403 { 2404 vm_paddr_t pa; 2405 pd_entry_t *pde; 2406 pt_entry_t *pte; 2407 vm_paddr_t opa; 2408 pt_entry_t origpte, newpte; 2409 vm_page_t mpte; 2410 2411 if (pmap == NULL) 2412 return; 2413 2414 va = trunc_page(va); 2415 2416 vm_object_hold(pmap->pm_pteobj); 2417 lwkt_gettoken(&vm_token); 2418 2419 /* 2420 * Get the page table page. The kernel_pmap's page table pages 2421 * are preallocated and have no associated vm_page_t. 2422 */ 2423 if (pmap == &kernel_pmap) 2424 mpte = NULL; 2425 else 2426 mpte = pmap_allocpte(pmap, va); 2427 2428 pde = pmap_pde(pmap, va); 2429 if (pde != NULL && (*pde & VPTE_V) != 0) { 2430 if ((*pde & VPTE_PS) != 0) 2431 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2432 pte = pmap_pde_to_pte(pde, va); 2433 } else { 2434 panic("pmap_enter: invalid page directory va=%#lx", va); 2435 } 2436 2437 KKASSERT(pte != NULL); 2438 /* 2439 * Deal with races on the original mapping (though don't worry 2440 * about VPTE_A races) by cleaning it. This will force a fault 2441 * if an attempt is made to write to the page. 2442 */ 2443 pa = VM_PAGE_TO_PHYS(m); 2444 origpte = pmap_clean_pte(pte, pmap, va); 2445 opa = origpte & VPTE_FRAME; 2446 2447 if (origpte & VPTE_PS) 2448 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2449 2450 /* 2451 * Mapping has not changed, must be protection or wiring change. 2452 */ 2453 if (origpte && (opa == pa)) { 2454 /* 2455 * Wiring change, just update stats. We don't worry about 2456 * wiring PT pages as they remain resident as long as there 2457 * are valid mappings in them. Hence, if a user page is wired, 2458 * the PT page will be also. 2459 */ 2460 if (wired && ((origpte & VPTE_WIRED) == 0)) 2461 ++pmap->pm_stats.wired_count; 2462 else if (!wired && (origpte & VPTE_WIRED)) 2463 --pmap->pm_stats.wired_count; 2464 2465 /* 2466 * Remove the extra pte reference. Note that we cannot 2467 * optimize the RO->RW case because we have adjusted the 2468 * wiring count above and may need to adjust the wiring 2469 * bits below. 2470 */ 2471 if (mpte) 2472 mpte->hold_count--; 2473 2474 /* 2475 * We might be turning off write access to the page, 2476 * so we go ahead and sense modify status. 2477 */ 2478 if (origpte & VPTE_MANAGED) { 2479 if ((origpte & VPTE_M) && 2480 pmap_track_modified(pmap, va)) { 2481 vm_page_t om; 2482 om = PHYS_TO_VM_PAGE(opa); 2483 vm_page_dirty(om); 2484 } 2485 pa |= VPTE_MANAGED; 2486 KKASSERT(m->flags & PG_MAPPED); 2487 } 2488 goto validate; 2489 } 2490 /* 2491 * Mapping has changed, invalidate old range and fall through to 2492 * handle validating new mapping. 2493 */ 2494 if (opa) { 2495 int err; 2496 err = pmap_remove_pte(pmap, pte, va); 2497 if (err) 2498 panic("pmap_enter: pte vanished, va: 0x%lx", va); 2499 } 2500 2501 /* 2502 * Enter on the PV list if part of our managed memory. Note that we 2503 * raise IPL while manipulating pv_table since pmap_enter can be 2504 * called at interrupt time. 2505 */ 2506 if (pmap_initialized && 2507 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2508 pmap_insert_entry(pmap, va, mpte, m); 2509 pa |= VPTE_MANAGED; 2510 vm_page_flag_set(m, PG_MAPPED); 2511 } 2512 2513 /* 2514 * Increment counters 2515 */ 2516 ++pmap->pm_stats.resident_count; 2517 if (wired) 2518 pmap->pm_stats.wired_count++; 2519 2520 validate: 2521 /* 2522 * Now validate mapping with desired protection/wiring. 2523 */ 2524 newpte = (pt_entry_t) (pa | pte_prot(pmap, prot) | VPTE_V | VPTE_U); 2525 2526 if (wired) 2527 newpte |= VPTE_WIRED; 2528 // if (pmap != &kernel_pmap) 2529 newpte |= VPTE_U; 2530 2531 /* 2532 * If the mapping or permission bits are different from the 2533 * (now cleaned) original pte, an update is needed. We've 2534 * already downgraded or invalidated the page so all we have 2535 * to do now is update the bits. 2536 * 2537 * XXX should we synchronize RO->RW changes to avoid another 2538 * fault? 2539 */ 2540 if ((origpte & ~(VPTE_RW|VPTE_M|VPTE_A)) != newpte) { 2541 *pte = newpte | VPTE_A; 2542 if (newpte & VPTE_RW) 2543 vm_page_flag_set(m, PG_WRITEABLE); 2544 } 2545 KKASSERT((newpte & VPTE_MANAGED) == 0 || (m->flags & PG_MAPPED)); 2546 lwkt_reltoken(&vm_token); 2547 vm_object_drop(pmap->pm_pteobj); 2548 } 2549 2550 /* 2551 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. 2552 * 2553 * Currently this routine may only be used on user pmaps, not kernel_pmap. 2554 * 2555 * No requirements. 2556 */ 2557 void 2558 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) 2559 { 2560 pt_entry_t *pte; 2561 vm_paddr_t pa; 2562 vm_page_t mpte; 2563 vm_pindex_t ptepindex; 2564 pd_entry_t *ptepa; 2565 2566 KKASSERT(pmap != &kernel_pmap); 2567 2568 KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS); 2569 2570 /* 2571 * Calculate pagetable page index 2572 */ 2573 ptepindex = pmap_pde_pindex(va); 2574 2575 vm_object_hold(pmap->pm_pteobj); 2576 lwkt_gettoken(&vm_token); 2577 2578 do { 2579 /* 2580 * Get the page directory entry 2581 */ 2582 ptepa = pmap_pde(pmap, va); 2583 2584 /* 2585 * If the page table page is mapped, we just increment 2586 * the hold count, and activate it. 2587 */ 2588 if (ptepa && (*ptepa & VPTE_V) != 0) { 2589 if (*ptepa & VPTE_PS) 2590 panic("pmap_enter_quick: unexpected mapping into 2MB page"); 2591 if (pmap->pm_ptphint && 2592 (pmap->pm_ptphint->pindex == ptepindex)) { 2593 mpte = pmap->pm_ptphint; 2594 } else { 2595 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 2596 pmap->pm_ptphint = mpte; 2597 vm_page_wakeup(mpte); 2598 } 2599 if (mpte) 2600 mpte->hold_count++; 2601 } else { 2602 mpte = _pmap_allocpte(pmap, ptepindex); 2603 } 2604 } while (mpte == NULL); 2605 2606 /* 2607 * Ok, now that the page table page has been validated, get the pte. 2608 * If the pte is already mapped undo mpte's hold_count and 2609 * just return. 2610 */ 2611 pte = pmap_pte(pmap, va); 2612 if (*pte & VPTE_V) { 2613 KKASSERT(mpte != NULL); 2614 pmap_unwire_pte_hold(pmap, va, mpte); 2615 pa = VM_PAGE_TO_PHYS(m); 2616 KKASSERT(((*pte ^ pa) & VPTE_FRAME) == 0); 2617 lwkt_reltoken(&vm_token); 2618 vm_object_drop(pmap->pm_pteobj); 2619 return; 2620 } 2621 2622 /* 2623 * Enter on the PV list if part of our managed memory 2624 */ 2625 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2626 pmap_insert_entry(pmap, va, mpte, m); 2627 vm_page_flag_set(m, PG_MAPPED); 2628 } 2629 2630 /* 2631 * Increment counters 2632 */ 2633 ++pmap->pm_stats.resident_count; 2634 2635 pa = VM_PAGE_TO_PHYS(m); 2636 2637 /* 2638 * Now validate mapping with RO protection 2639 */ 2640 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2641 *pte = (vpte_t)pa | VPTE_V | VPTE_U; 2642 else 2643 *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED; 2644 /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */ 2645 /*pmap_inval_flush(&info); don't need for vkernel */ 2646 lwkt_reltoken(&vm_token); 2647 vm_object_drop(pmap->pm_pteobj); 2648 } 2649 2650 /* 2651 * Make a temporary mapping for a physical address. This is only intended 2652 * to be used for panic dumps. 2653 * 2654 * The caller is responsible for calling smp_invltlb(). 2655 */ 2656 void * 2657 pmap_kenter_temporary(vm_paddr_t pa, long i) 2658 { 2659 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa); 2660 return ((void *)crashdumpmap); 2661 } 2662 2663 #define MAX_INIT_PT (96) 2664 2665 /* 2666 * This routine preloads the ptes for a given object into the specified pmap. 2667 * This eliminates the blast of soft faults on process startup and 2668 * immediately after an mmap. 2669 * 2670 * No requirements. 2671 */ 2672 static int pmap_object_init_pt_callback(vm_page_t p, void *data); 2673 2674 void 2675 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, 2676 vm_object_t object, vm_pindex_t pindex, 2677 vm_size_t size, int limit) 2678 { 2679 struct rb_vm_page_scan_info info; 2680 struct lwp *lp; 2681 vm_size_t psize; 2682 2683 /* 2684 * We can't preinit if read access isn't set or there is no pmap 2685 * or object. 2686 */ 2687 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) 2688 return; 2689 2690 /* 2691 * We can't preinit if the pmap is not the current pmap 2692 */ 2693 lp = curthread->td_lwp; 2694 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) 2695 return; 2696 2697 psize = x86_64_btop(size); 2698 2699 if ((object->type != OBJT_VNODE) || 2700 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2701 (object->resident_page_count > MAX_INIT_PT))) { 2702 return; 2703 } 2704 2705 if (psize + pindex > object->size) { 2706 if (object->size < pindex) 2707 return; 2708 psize = object->size - pindex; 2709 } 2710 2711 if (psize == 0) 2712 return; 2713 2714 /* 2715 * Use a red-black scan to traverse the requested range and load 2716 * any valid pages found into the pmap. 2717 * 2718 * We cannot safely scan the object's memq unless we are in a 2719 * critical section since interrupts can remove pages from objects. 2720 */ 2721 info.start_pindex = pindex; 2722 info.end_pindex = pindex + psize - 1; 2723 info.limit = limit; 2724 info.mpte = NULL; 2725 info.addr = addr; 2726 info.pmap = pmap; 2727 2728 vm_object_hold_shared(object); 2729 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, 2730 pmap_object_init_pt_callback, &info); 2731 vm_object_drop(object); 2732 } 2733 2734 static 2735 int 2736 pmap_object_init_pt_callback(vm_page_t p, void *data) 2737 { 2738 struct rb_vm_page_scan_info *info = data; 2739 vm_pindex_t rel_index; 2740 /* 2741 * don't allow an madvise to blow away our really 2742 * free pages allocating pv entries. 2743 */ 2744 if ((info->limit & MAP_PREFAULT_MADVISE) && 2745 vmstats.v_free_count < vmstats.v_free_reserved) { 2746 return(-1); 2747 } 2748 2749 /* 2750 * Ignore list markers and ignore pages we cannot instantly 2751 * busy (while holding the object token). 2752 */ 2753 if (p->flags & PG_MARKER) 2754 return 0; 2755 if (vm_page_busy_try(p, TRUE)) 2756 return 0; 2757 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2758 (p->flags & PG_FICTITIOUS) == 0) { 2759 if ((p->queue - p->pc) == PQ_CACHE) 2760 vm_page_deactivate(p); 2761 rel_index = p->pindex - info->start_pindex; 2762 pmap_enter_quick(info->pmap, 2763 info->addr + x86_64_ptob(rel_index), p); 2764 } 2765 vm_page_wakeup(p); 2766 return(0); 2767 } 2768 2769 /* 2770 * Return TRUE if the pmap is in shape to trivially 2771 * pre-fault the specified address. 2772 * 2773 * Returns FALSE if it would be non-trivial or if a 2774 * pte is already loaded into the slot. 2775 * 2776 * No requirements. 2777 */ 2778 int 2779 pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) 2780 { 2781 pt_entry_t *pte; 2782 pd_entry_t *pde; 2783 int ret; 2784 2785 lwkt_gettoken(&vm_token); 2786 pde = pmap_pde(pmap, addr); 2787 if (pde == NULL || *pde == 0) { 2788 ret = 0; 2789 } else { 2790 pte = pmap_pde_to_pte(pde, addr); 2791 ret = (*pte) ? 0 : 1; 2792 } 2793 lwkt_reltoken(&vm_token); 2794 return (ret); 2795 } 2796 2797 /* 2798 * Change the wiring attribute for a map/virtual-address pair. 2799 * 2800 * The mapping must already exist in the pmap. 2801 * No other requirements. 2802 */ 2803 void 2804 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired, 2805 vm_map_entry_t entry __unused) 2806 { 2807 pt_entry_t *pte; 2808 2809 if (pmap == NULL) 2810 return; 2811 2812 lwkt_gettoken(&vm_token); 2813 pte = pmap_pte(pmap, va); 2814 2815 if (wired && !pmap_pte_w(pte)) 2816 pmap->pm_stats.wired_count++; 2817 else if (!wired && pmap_pte_w(pte)) 2818 pmap->pm_stats.wired_count--; 2819 2820 /* 2821 * Wiring is not a hardware characteristic so there is no need to 2822 * invalidate TLB. However, in an SMP environment we must use 2823 * a locked bus cycle to update the pte (if we are not using 2824 * the pmap_inval_*() API that is)... it's ok to do this for simple 2825 * wiring changes. 2826 */ 2827 if (wired) 2828 atomic_set_long(pte, VPTE_WIRED); 2829 else 2830 atomic_clear_long(pte, VPTE_WIRED); 2831 lwkt_reltoken(&vm_token); 2832 } 2833 2834 /* 2835 * Copy the range specified by src_addr/len 2836 * from the source map to the range dst_addr/len 2837 * in the destination map. 2838 * 2839 * This routine is only advisory and need not do anything. 2840 */ 2841 void 2842 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2843 vm_size_t len, vm_offset_t src_addr) 2844 { 2845 /* 2846 * XXX BUGGY. Amoung other things srcmpte is assumed to remain 2847 * valid through blocking calls, and that's just not going to 2848 * be the case. 2849 * 2850 * FIXME! 2851 */ 2852 return; 2853 } 2854 2855 /* 2856 * pmap_zero_page: 2857 * 2858 * Zero the specified physical page. 2859 * 2860 * This function may be called from an interrupt and no locking is 2861 * required. 2862 */ 2863 void 2864 pmap_zero_page(vm_paddr_t phys) 2865 { 2866 vm_offset_t va = PHYS_TO_DMAP(phys); 2867 2868 bzero((void *)va, PAGE_SIZE); 2869 } 2870 2871 /* 2872 * pmap_zero_page: 2873 * 2874 * Zero part of a physical page by mapping it into memory and clearing 2875 * its contents with bzero. 2876 * 2877 * off and size may not cover an area beyond a single hardware page. 2878 */ 2879 void 2880 pmap_zero_page_area(vm_paddr_t phys, int off, int size) 2881 { 2882 crit_enter(); 2883 vm_offset_t virt = PHYS_TO_DMAP(phys); 2884 bzero((char *)virt + off, size); 2885 crit_exit(); 2886 } 2887 2888 /* 2889 * pmap_copy_page: 2890 * 2891 * Copy the physical page from the source PA to the target PA. 2892 * This function may be called from an interrupt. No locking 2893 * is required. 2894 */ 2895 void 2896 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) 2897 { 2898 vm_offset_t src_virt, dst_virt; 2899 2900 crit_enter(); 2901 src_virt = PHYS_TO_DMAP(src); 2902 dst_virt = PHYS_TO_DMAP(dst); 2903 bcopy((void *)src_virt, (void *)dst_virt, PAGE_SIZE); 2904 crit_exit(); 2905 } 2906 2907 /* 2908 * pmap_copy_page_frag: 2909 * 2910 * Copy the physical page from the source PA to the target PA. 2911 * This function may be called from an interrupt. No locking 2912 * is required. 2913 */ 2914 void 2915 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) 2916 { 2917 vm_offset_t src_virt, dst_virt; 2918 2919 crit_enter(); 2920 src_virt = PHYS_TO_DMAP(src); 2921 dst_virt = PHYS_TO_DMAP(dst); 2922 bcopy((char *)src_virt + (src & PAGE_MASK), 2923 (char *)dst_virt + (dst & PAGE_MASK), 2924 bytes); 2925 crit_exit(); 2926 } 2927 2928 /* 2929 * Returns true if the pmap's pv is one of the first 16 pvs linked to 2930 * from this page. This count may be changed upwards or downwards 2931 * in the future; it is only necessary that true be returned for a small 2932 * subset of pmaps for proper page aging. 2933 * 2934 * No other requirements. 2935 */ 2936 boolean_t 2937 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2938 { 2939 pv_entry_t pv; 2940 int loops = 0; 2941 2942 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2943 return FALSE; 2944 2945 crit_enter(); 2946 lwkt_gettoken(&vm_token); 2947 2948 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2949 if (pv->pv_pmap == pmap) { 2950 lwkt_reltoken(&vm_token); 2951 crit_exit(); 2952 return TRUE; 2953 } 2954 loops++; 2955 if (loops >= 16) 2956 break; 2957 } 2958 lwkt_reltoken(&vm_token); 2959 crit_exit(); 2960 return (FALSE); 2961 } 2962 2963 /* 2964 * Remove all pages from specified address space this aids process 2965 * exit speeds. Also, this code is special cased for current 2966 * process only, but can have the more generic (and slightly slower) 2967 * mode enabled. This is much faster than pmap_remove in the case 2968 * of running down an entire address space. 2969 * 2970 * No other requirements. 2971 */ 2972 void 2973 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2974 { 2975 pt_entry_t *pte, tpte; 2976 pv_entry_t pv, npv; 2977 vm_page_t m; 2978 int save_generation; 2979 2980 if (pmap->pm_pteobj) 2981 vm_object_hold(pmap->pm_pteobj); 2982 lwkt_gettoken(&vm_token); 2983 2984 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2985 if (pv->pv_va >= eva || pv->pv_va < sva) { 2986 npv = TAILQ_NEXT(pv, pv_plist); 2987 continue; 2988 } 2989 2990 KKASSERT(pmap == pv->pv_pmap); 2991 2992 pte = pmap_pte(pmap, pv->pv_va); 2993 2994 /* 2995 * We cannot remove wired pages from a process' mapping 2996 * at this time 2997 */ 2998 if (*pte & VPTE_WIRED) { 2999 npv = TAILQ_NEXT(pv, pv_plist); 3000 continue; 3001 } 3002 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 3003 3004 m = PHYS_TO_VM_PAGE(tpte & VPTE_FRAME); 3005 3006 KASSERT(m < &vm_page_array[vm_page_array_size], 3007 ("pmap_remove_pages: bad tpte %lx", tpte)); 3008 3009 KKASSERT(pmap->pm_stats.resident_count > 0); 3010 --pmap->pm_stats.resident_count; 3011 3012 /* 3013 * Update the vm_page_t clean and reference bits. 3014 */ 3015 if (tpte & VPTE_M) { 3016 vm_page_dirty(m); 3017 } 3018 3019 npv = TAILQ_NEXT(pv, pv_plist); 3020 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 3021 save_generation = ++pmap->pm_generation; 3022 3023 m->md.pv_list_count--; 3024 atomic_add_int(&m->object->agg_pv_list_count, -1); 3025 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3026 if (TAILQ_EMPTY(&m->md.pv_list)) 3027 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 3028 3029 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 3030 free_pv_entry(pv); 3031 3032 /* 3033 * Restart the scan if we blocked during the unuse or free 3034 * calls and other removals were made. 3035 */ 3036 if (save_generation != pmap->pm_generation) { 3037 kprintf("Warning: pmap_remove_pages race-A avoided\n"); 3038 npv = TAILQ_FIRST(&pmap->pm_pvlist); 3039 } 3040 } 3041 lwkt_reltoken(&vm_token); 3042 if (pmap->pm_pteobj) 3043 vm_object_drop(pmap->pm_pteobj); 3044 } 3045 3046 /* 3047 * pmap_testbit tests bits in active mappings of a VM page. 3048 */ 3049 static boolean_t 3050 pmap_testbit(vm_page_t m, int bit) 3051 { 3052 pv_entry_t pv; 3053 pt_entry_t *pte; 3054 3055 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3056 return FALSE; 3057 3058 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 3059 return FALSE; 3060 3061 crit_enter(); 3062 3063 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3064 /* 3065 * if the bit being tested is the modified bit, then 3066 * mark clean_map and ptes as never 3067 * modified. 3068 */ 3069 if (bit & (VPTE_A|VPTE_M)) { 3070 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3071 continue; 3072 } 3073 3074 #if defined(PMAP_DIAGNOSTIC) 3075 if (pv->pv_pmap == NULL) { 3076 kprintf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 3077 continue; 3078 } 3079 #endif 3080 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3081 if (*pte & bit) { 3082 crit_exit(); 3083 return TRUE; 3084 } 3085 } 3086 crit_exit(); 3087 return (FALSE); 3088 } 3089 3090 /* 3091 * This routine is used to clear bits in ptes. Certain bits require special 3092 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. 3093 * 3094 * This routine is only called with certain VPTE_* bit combinations. 3095 */ 3096 static __inline void 3097 pmap_clearbit(vm_page_t m, int bit) 3098 { 3099 pv_entry_t pv; 3100 pt_entry_t *pte; 3101 pt_entry_t pbits; 3102 3103 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3104 return; 3105 3106 crit_enter(); 3107 3108 /* 3109 * Loop over all current mappings setting/clearing as appropos If 3110 * setting RO do we need to clear the VAC? 3111 */ 3112 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3113 /* 3114 * don't write protect pager mappings 3115 */ 3116 if (bit == VPTE_RW) { 3117 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3118 continue; 3119 } 3120 3121 #if defined(PMAP_DIAGNOSTIC) 3122 if (pv->pv_pmap == NULL) { 3123 kprintf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va); 3124 continue; 3125 } 3126 #endif 3127 3128 /* 3129 * Careful here. We can use a locked bus instruction to 3130 * clear VPTE_A or VPTE_M safely but we need to synchronize 3131 * with the target cpus when we mess with VPTE_RW. 3132 * 3133 * On virtual kernels we must force a new fault-on-write 3134 * in the real kernel if we clear the Modify bit ourselves, 3135 * otherwise the real kernel will not get a new fault and 3136 * will never set our Modify bit again. 3137 */ 3138 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3139 if (*pte & bit) { 3140 if (bit == VPTE_RW) { 3141 /* 3142 * We must also clear VPTE_M when clearing 3143 * VPTE_RW 3144 */ 3145 pbits = pmap_clean_pte(pte, pv->pv_pmap, 3146 pv->pv_va); 3147 if (pbits & VPTE_M) 3148 vm_page_dirty(m); 3149 } else if (bit == VPTE_M) { 3150 /* 3151 * We do not have to make the page read-only 3152 * when clearing the Modify bit. The real 3153 * kernel will make the real PTE read-only 3154 * or otherwise detect the write and set 3155 * our VPTE_M again simply by us invalidating 3156 * the real kernel VA for the pmap (as we did 3157 * above). This allows the real kernel to 3158 * handle the write fault without forwarding 3159 * the fault to us. 3160 */ 3161 atomic_clear_long(pte, VPTE_M); 3162 } else if ((bit & (VPTE_RW|VPTE_M)) == (VPTE_RW|VPTE_M)) { 3163 /* 3164 * We've been asked to clear W & M, I guess 3165 * the caller doesn't want us to update 3166 * the dirty status of the VM page. 3167 */ 3168 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va); 3169 } else { 3170 /* 3171 * We've been asked to clear bits that do 3172 * not interact with hardware. 3173 */ 3174 atomic_clear_long(pte, bit); 3175 } 3176 } 3177 } 3178 crit_exit(); 3179 } 3180 3181 /* 3182 * Lower the permission for all mappings to a given page. 3183 * 3184 * No other requirements. 3185 */ 3186 void 3187 pmap_page_protect(vm_page_t m, vm_prot_t prot) 3188 { 3189 /* JG NX support? */ 3190 if ((prot & VM_PROT_WRITE) == 0) { 3191 lwkt_gettoken(&vm_token); 3192 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3193 pmap_clearbit(m, VPTE_RW); 3194 vm_page_flag_clear(m, PG_WRITEABLE); 3195 } else { 3196 pmap_remove_all(m); 3197 } 3198 lwkt_reltoken(&vm_token); 3199 } 3200 } 3201 3202 vm_paddr_t 3203 pmap_phys_address(vm_pindex_t ppn) 3204 { 3205 return (x86_64_ptob(ppn)); 3206 } 3207 3208 /* 3209 * Return a count of reference bits for a page, clearing those bits. 3210 * It is not necessary for every reference bit to be cleared, but it 3211 * is necessary that 0 only be returned when there are truly no 3212 * reference bits set. 3213 * 3214 * XXX: The exact number of bits to check and clear is a matter that 3215 * should be tested and standardized at some point in the future for 3216 * optimal aging of shared pages. 3217 * 3218 * No other requirements. 3219 */ 3220 int 3221 pmap_ts_referenced(vm_page_t m) 3222 { 3223 pv_entry_t pv, pvf, pvn; 3224 pt_entry_t *pte; 3225 int rtval = 0; 3226 3227 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3228 return (rtval); 3229 3230 crit_enter(); 3231 lwkt_gettoken(&vm_token); 3232 3233 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3234 3235 pvf = pv; 3236 3237 do { 3238 pvn = TAILQ_NEXT(pv, pv_list); 3239 3240 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3241 3242 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3243 3244 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3245 continue; 3246 3247 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3248 3249 if (pte && (*pte & VPTE_A)) { 3250 atomic_clear_long(pte, VPTE_A); 3251 rtval++; 3252 if (rtval > 4) { 3253 break; 3254 } 3255 } 3256 } while ((pv = pvn) != NULL && pv != pvf); 3257 } 3258 lwkt_reltoken(&vm_token); 3259 crit_exit(); 3260 3261 return (rtval); 3262 } 3263 3264 /* 3265 * Return whether or not the specified physical page was modified 3266 * in any physical maps. 3267 * 3268 * No other requirements. 3269 */ 3270 boolean_t 3271 pmap_is_modified(vm_page_t m) 3272 { 3273 boolean_t res; 3274 3275 lwkt_gettoken(&vm_token); 3276 res = pmap_testbit(m, VPTE_M); 3277 lwkt_reltoken(&vm_token); 3278 return (res); 3279 } 3280 3281 /* 3282 * Clear the modify bits on the specified physical page. 3283 * 3284 * No other requirements. 3285 */ 3286 void 3287 pmap_clear_modify(vm_page_t m) 3288 { 3289 lwkt_gettoken(&vm_token); 3290 pmap_clearbit(m, VPTE_M); 3291 lwkt_reltoken(&vm_token); 3292 } 3293 3294 /* 3295 * Clear the reference bit on the specified physical page. 3296 * 3297 * No other requirements. 3298 */ 3299 void 3300 pmap_clear_reference(vm_page_t m) 3301 { 3302 lwkt_gettoken(&vm_token); 3303 pmap_clearbit(m, VPTE_A); 3304 lwkt_reltoken(&vm_token); 3305 } 3306 3307 /* 3308 * Miscellaneous support routines follow 3309 */ 3310 3311 static void 3312 i386_protection_init(void) 3313 { 3314 int *kp, prot; 3315 3316 kp = protection_codes; 3317 for (prot = 0; prot < 8; prot++) { 3318 if (prot & VM_PROT_READ) 3319 *kp |= 0; /* if it's VALID is readeable */ 3320 if (prot & VM_PROT_WRITE) 3321 *kp |= VPTE_RW; 3322 if (prot & VM_PROT_EXECUTE) 3323 *kp |= 0; /* if it's VALID is executable */ 3324 ++kp; 3325 } 3326 } 3327 3328 /* 3329 * Sets the memory attribute for the specified page. 3330 */ 3331 void 3332 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3333 { 3334 /* This is a vkernel, do nothing */ 3335 } 3336 3337 /* 3338 * Change the PAT attribute on an existing kernel memory map. Caller 3339 * must ensure that the virtual memory in question is not accessed 3340 * during the adjustment. 3341 */ 3342 void 3343 pmap_change_attr(vm_offset_t va, vm_size_t count, int mode) 3344 { 3345 /* This is a vkernel, do nothing */ 3346 } 3347 3348 /* 3349 * Perform the pmap work for mincore 3350 * 3351 * No other requirements. 3352 */ 3353 int 3354 pmap_mincore(pmap_t pmap, vm_offset_t addr) 3355 { 3356 pt_entry_t *ptep, pte; 3357 vm_page_t m; 3358 int val = 0; 3359 3360 lwkt_gettoken(&vm_token); 3361 ptep = pmap_pte(pmap, addr); 3362 3363 if (ptep && (pte = *ptep) != 0) { 3364 vm_paddr_t pa; 3365 3366 val = MINCORE_INCORE; 3367 if ((pte & VPTE_MANAGED) == 0) 3368 goto done; 3369 3370 pa = pte & VPTE_FRAME; 3371 3372 m = PHYS_TO_VM_PAGE(pa); 3373 3374 /* 3375 * Modified by us 3376 */ 3377 if (pte & VPTE_M) 3378 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3379 /* 3380 * Modified by someone 3381 */ 3382 else if (m->dirty || pmap_is_modified(m)) 3383 val |= MINCORE_MODIFIED_OTHER; 3384 /* 3385 * Referenced by us 3386 */ 3387 if (pte & VPTE_A) 3388 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3389 3390 /* 3391 * Referenced by someone 3392 */ 3393 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3394 val |= MINCORE_REFERENCED_OTHER; 3395 vm_page_flag_set(m, PG_REFERENCED); 3396 } 3397 } 3398 done: 3399 lwkt_reltoken(&vm_token); 3400 return val; 3401 } 3402 3403 /* 3404 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new 3405 * vmspace will be ref'd and the old one will be deref'd. 3406 * 3407 * Caller must hold vmspace->vm_map.token for oldvm and newvm 3408 */ 3409 void 3410 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) 3411 { 3412 struct vmspace *oldvm; 3413 struct lwp *lp; 3414 3415 crit_enter(); 3416 oldvm = p->p_vmspace; 3417 if (oldvm != newvm) { 3418 if (adjrefs) 3419 vmspace_ref(newvm); 3420 p->p_vmspace = newvm; 3421 KKASSERT(p->p_nthreads == 1); 3422 lp = RB_ROOT(&p->p_lwp_tree); 3423 pmap_setlwpvm(lp, newvm); 3424 if (adjrefs) 3425 vmspace_rel(oldvm); 3426 } 3427 crit_exit(); 3428 } 3429 3430 /* 3431 * Set the vmspace for a LWP. The vmspace is almost universally set the 3432 * same as the process vmspace, but virtual kernels need to swap out contexts 3433 * on a per-lwp basis. 3434 */ 3435 void 3436 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) 3437 { 3438 struct vmspace *oldvm; 3439 struct pmap *pmap; 3440 3441 oldvm = lp->lwp_vmspace; 3442 if (oldvm != newvm) { 3443 crit_enter(); 3444 lp->lwp_vmspace = newvm; 3445 if (curthread->td_lwp == lp) { 3446 pmap = vmspace_pmap(newvm); 3447 ATOMIC_CPUMASK_ORBIT(pmap->pm_active, mycpu->gd_cpuid); 3448 if (pmap->pm_active_lock & CPULOCK_EXCL) 3449 pmap_interlock_wait(newvm); 3450 #if defined(SWTCH_OPTIM_STATS) 3451 tlb_flush_count++; 3452 #endif 3453 pmap = vmspace_pmap(oldvm); 3454 ATOMIC_CPUMASK_NANDBIT(pmap->pm_active, 3455 mycpu->gd_cpuid); 3456 } 3457 crit_exit(); 3458 } 3459 } 3460 3461 /* 3462 * The swtch code tried to switch in a heavy weight process whos pmap 3463 * is locked by another cpu. We have to wait for the lock to clear before 3464 * the pmap can be used. 3465 */ 3466 void 3467 pmap_interlock_wait (struct vmspace *vm) 3468 { 3469 pmap_t pmap = vmspace_pmap(vm); 3470 3471 if (pmap->pm_active_lock & CPULOCK_EXCL) { 3472 crit_enter(); 3473 while (pmap->pm_active_lock & CPULOCK_EXCL) { 3474 cpu_ccfence(); 3475 pthread_yield(); 3476 } 3477 crit_exit(); 3478 } 3479 } 3480 3481 vm_offset_t 3482 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3483 { 3484 3485 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3486 return addr; 3487 } 3488 3489 addr = roundup2(addr, NBPDR); 3490 return addr; 3491 } 3492 3493 /* 3494 * Used by kmalloc/kfree, page already exists at va 3495 */ 3496 vm_page_t 3497 pmap_kvtom(vm_offset_t va) 3498 { 3499 vpte_t *ptep; 3500 3501 KKASSERT(va >= KvaStart && va < KvaEnd); 3502 ptep = vtopte(va); 3503 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); 3504 } 3505 3506 void 3507 pmap_object_init(vm_object_t object) 3508 { 3509 /* empty */ 3510 } 3511 3512 void 3513 pmap_object_free(vm_object_t object) 3514 { 3515 /* empty */ 3516 } 3517 3518 void 3519 pmap_pgscan(struct pmap_pgscan_info *pginfo) 3520 { 3521 pmap_t pmap = pginfo->pmap; 3522 vm_offset_t sva = pginfo->beg_addr; 3523 vm_offset_t eva = pginfo->end_addr; 3524 vm_offset_t va_next; 3525 pml4_entry_t *pml4e; 3526 pdp_entry_t *pdpe; 3527 pd_entry_t ptpaddr, *pde; 3528 pt_entry_t *pte; 3529 int stop = 0; 3530 3531 lwkt_gettoken(&vm_token); 3532 3533 for (; sva < eva; sva = va_next) { 3534 if (stop) 3535 break; 3536 3537 pml4e = pmap_pml4e(pmap, sva); 3538 if ((*pml4e & VPTE_V) == 0) { 3539 va_next = (sva + NBPML4) & ~PML4MASK; 3540 if (va_next < sva) 3541 va_next = eva; 3542 continue; 3543 } 3544 3545 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 3546 if ((*pdpe & VPTE_V) == 0) { 3547 va_next = (sva + NBPDP) & ~PDPMASK; 3548 if (va_next < sva) 3549 va_next = eva; 3550 continue; 3551 } 3552 3553 va_next = (sva + NBPDR) & ~PDRMASK; 3554 if (va_next < sva) 3555 va_next = eva; 3556 3557 pde = pmap_pdpe_to_pde(pdpe, sva); 3558 ptpaddr = *pde; 3559 3560 /* 3561 * Check for large page (ignore). 3562 */ 3563 if ((ptpaddr & VPTE_PS) != 0) { 3564 #if 0 3565 pmap_clean_pde(pde, pmap, sva); 3566 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 3567 #endif 3568 continue; 3569 } 3570 3571 /* 3572 * Weed out invalid mappings. Note: we assume that the page 3573 * directory table is always allocated, and in kernel virtual. 3574 */ 3575 if (ptpaddr == 0) 3576 continue; 3577 3578 if (va_next > eva) 3579 va_next = eva; 3580 3581 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 3582 sva += PAGE_SIZE) { 3583 vm_page_t m; 3584 3585 if (stop) 3586 break; 3587 if ((*pte & VPTE_MANAGED) == 0) 3588 continue; 3589 3590 m = PHYS_TO_VM_PAGE(*pte & VPTE_FRAME); 3591 if (vm_page_busy_try(m, TRUE) == 0) { 3592 if (pginfo->callback(pginfo, sva, m) < 0) 3593 stop = 1; 3594 } 3595 } 3596 } 3597 lwkt_reltoken(&vm_token); 3598 } 3599