1 /* 2 * (MPSAFE) 3 * 4 * Copyright (c) 1991 Regents of the University of California. 5 * Copyright (c) 1994 John S. Dyson 6 * Copyright (c) 1994 David Greenman 7 * Copyright (c) 2003 Peter Wemm 8 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 9 * Copyright (c) 2008, 2009 The DragonFly Project. 10 * Copyright (c) 2008, 2009 Jordan Gordeev. 11 * All rights reserved. 12 * 13 * This code is derived from software contributed to Berkeley by 14 * the Systems Programming Group of the University of Utah Computer 15 * Science Department and William Jolitz of UUNET Technologies Inc. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. All advertising materials mentioning features or use of this software 26 * must display the following acknowledgement: 27 * This product includes software developed by the University of 28 * California, Berkeley and its contributors. 29 * 4. Neither the name of the University nor the names of its contributors 30 * may be used to endorse or promote products derived from this software 31 * without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 * SUCH DAMAGE. 44 * 45 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 46 * $FreeBSD: src/sys/i386/i386/pmap.c,v 1.250.2.18 2002/03/06 22:48:53 silby Exp $ 47 */ 48 49 /* 50 * Manages physical address maps. 51 */ 52 53 #if JG 54 #include "opt_pmap.h" 55 #endif 56 #include "opt_msgbuf.h" 57 58 #include <sys/param.h> 59 #include <sys/systm.h> 60 #include <sys/kernel.h> 61 #include <sys/proc.h> 62 #include <sys/msgbuf.h> 63 #include <sys/vmmeter.h> 64 #include <sys/mman.h> 65 #include <sys/vmspace.h> 66 67 #include <vm/vm.h> 68 #include <vm/vm_param.h> 69 #include <sys/sysctl.h> 70 #include <sys/lock.h> 71 #include <vm/vm_kern.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_map.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_pageout.h> 77 #include <vm/vm_pager.h> 78 #include <vm/vm_zone.h> 79 80 #include <sys/user.h> 81 #include <sys/thread2.h> 82 #include <sys/sysref2.h> 83 #include <sys/spinlock2.h> 84 #include <vm/vm_page2.h> 85 86 #include <machine/cputypes.h> 87 #include <machine/md_var.h> 88 #include <machine/specialreg.h> 89 #include <machine/smp.h> 90 #include <machine/globaldata.h> 91 #include <machine/pmap.h> 92 #include <machine/pmap_inval.h> 93 94 #include <ddb/ddb.h> 95 96 #include <stdio.h> 97 #include <assert.h> 98 #include <stdlib.h> 99 #include <pthread.h> 100 101 #define PMAP_KEEP_PDIRS 102 #ifndef PMAP_SHPGPERPROC 103 #define PMAP_SHPGPERPROC 1000 104 #endif 105 106 #if defined(DIAGNOSTIC) 107 #define PMAP_DIAGNOSTIC 108 #endif 109 110 #define MINPV 2048 111 112 #if !defined(PMAP_DIAGNOSTIC) 113 #define PMAP_INLINE __inline 114 #else 115 #define PMAP_INLINE 116 #endif 117 118 /* 119 * Get PDEs and PTEs for user/kernel address space 120 */ 121 static pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va); 122 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 123 124 #define pmap_pde_v(pte) ((*(pd_entry_t *)pte & VPTE_V) != 0) 125 #define pmap_pte_w(pte) ((*(pt_entry_t *)pte & VPTE_WIRED) != 0) 126 #define pmap_pte_m(pte) ((*(pt_entry_t *)pte & VPTE_M) != 0) 127 #define pmap_pte_u(pte) ((*(pt_entry_t *)pte & VPTE_A) != 0) 128 #define pmap_pte_v(pte) ((*(pt_entry_t *)pte & VPTE_V) != 0) 129 130 /* 131 * Given a map and a machine independent protection code, 132 * convert to a vax protection code. 133 */ 134 #define pte_prot(m, p) \ 135 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) 136 static int protection_codes[8]; 137 138 struct pmap kernel_pmap; 139 static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list); 140 141 static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 142 143 static vm_object_t kptobj; 144 145 static int nkpt; 146 147 static uint64_t KPDphys; /* phys addr of kernel level 2 */ 148 uint64_t KPDPphys; /* phys addr of kernel level 3 */ 149 uint64_t KPML4phys; /* phys addr of kernel level 4 */ 150 151 extern int vmm_enabled; 152 extern void *vkernel_stack; 153 154 /* 155 * Data for the pv entry allocation mechanism 156 */ 157 static vm_zone_t pvzone; 158 static struct vm_zone pvzone_store; 159 static struct vm_object pvzone_obj; 160 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0; 161 static int pmap_pagedaemon_waken = 0; 162 static struct pv_entry *pvinit; 163 164 /* 165 * All those kernel PT submaps that BSD is so fond of 166 */ 167 pt_entry_t *CMAP1 = NULL, *ptmmap; 168 caddr_t CADDR1 = NULL; 169 static pt_entry_t *msgbufmap; 170 171 uint64_t KPTphys; 172 173 static PMAP_INLINE void free_pv_entry (pv_entry_t pv); 174 static pv_entry_t get_pv_entry (void); 175 static void i386_protection_init (void); 176 static __inline void pmap_clearbit (vm_page_t m, int bit); 177 178 static void pmap_remove_all (vm_page_t m); 179 static int pmap_remove_pte (struct pmap *pmap, pt_entry_t *ptq, 180 vm_offset_t sva); 181 static void pmap_remove_page (struct pmap *pmap, vm_offset_t va); 182 static int pmap_remove_entry (struct pmap *pmap, vm_page_t m, 183 vm_offset_t va); 184 static boolean_t pmap_testbit (vm_page_t m, int bit); 185 static void pmap_insert_entry (pmap_t pmap, vm_offset_t va, 186 vm_page_t mpte, vm_page_t m); 187 188 static vm_page_t pmap_allocpte (pmap_t pmap, vm_offset_t va); 189 190 static int pmap_release_free_page (pmap_t pmap, vm_page_t p); 191 static vm_page_t _pmap_allocpte (pmap_t pmap, vm_pindex_t ptepindex); 192 #if JGPMAP32 193 static pt_entry_t * pmap_pte_quick (pmap_t pmap, vm_offset_t va); 194 #endif 195 static vm_page_t pmap_page_lookup (vm_object_t object, vm_pindex_t pindex); 196 static int pmap_unuse_pt (pmap_t, vm_offset_t, vm_page_t); 197 198 /* 199 * pmap_pte_quick: 200 * 201 * Super fast pmap_pte routine best used when scanning the pv lists. 202 * This eliminates many course-grained invltlb calls. Note that many of 203 * the pv list scans are across different pmaps and it is very wasteful 204 * to do an entire invltlb when checking a single mapping. 205 * 206 * Should only be called while in a critical section. 207 */ 208 #if JGPMAP32 209 static __inline pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t va); 210 211 static pt_entry_t * 212 pmap_pte_quick(pmap_t pmap, vm_offset_t va) 213 { 214 return pmap_pte(pmap, va); 215 } 216 #endif 217 218 /* Return a non-clipped PD index for a given VA */ 219 static __inline vm_pindex_t 220 pmap_pde_pindex(vm_offset_t va) 221 { 222 return va >> PDRSHIFT; 223 } 224 225 /* Return various clipped indexes for a given VA */ 226 static __inline vm_pindex_t 227 pmap_pte_index(vm_offset_t va) 228 { 229 230 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 231 } 232 233 static __inline vm_pindex_t 234 pmap_pde_index(vm_offset_t va) 235 { 236 237 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 238 } 239 240 static __inline vm_pindex_t 241 pmap_pdpe_index(vm_offset_t va) 242 { 243 244 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 245 } 246 247 static __inline vm_pindex_t 248 pmap_pml4e_index(vm_offset_t va) 249 { 250 251 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 252 } 253 254 /* Return a pointer to the PML4 slot that corresponds to a VA */ 255 static __inline pml4_entry_t * 256 pmap_pml4e(pmap_t pmap, vm_offset_t va) 257 { 258 259 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 260 } 261 262 /* Return a pointer to the PDP slot that corresponds to a VA */ 263 static __inline pdp_entry_t * 264 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) 265 { 266 pdp_entry_t *pdpe; 267 268 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & VPTE_FRAME); 269 return (&pdpe[pmap_pdpe_index(va)]); 270 } 271 272 /* Return a pointer to the PDP slot that corresponds to a VA */ 273 static __inline pdp_entry_t * 274 pmap_pdpe(pmap_t pmap, vm_offset_t va) 275 { 276 pml4_entry_t *pml4e; 277 278 pml4e = pmap_pml4e(pmap, va); 279 if ((*pml4e & VPTE_V) == 0) 280 return NULL; 281 return (pmap_pml4e_to_pdpe(pml4e, va)); 282 } 283 284 /* Return a pointer to the PD slot that corresponds to a VA */ 285 static __inline pd_entry_t * 286 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) 287 { 288 pd_entry_t *pde; 289 290 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & VPTE_FRAME); 291 return (&pde[pmap_pde_index(va)]); 292 } 293 294 /* Return a pointer to the PD slot that corresponds to a VA */ 295 static __inline pd_entry_t * 296 pmap_pde(pmap_t pmap, vm_offset_t va) 297 { 298 pdp_entry_t *pdpe; 299 300 pdpe = pmap_pdpe(pmap, va); 301 if (pdpe == NULL || (*pdpe & VPTE_V) == 0) 302 return NULL; 303 return (pmap_pdpe_to_pde(pdpe, va)); 304 } 305 306 /* Return a pointer to the PT slot that corresponds to a VA */ 307 static __inline pt_entry_t * 308 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 309 { 310 pt_entry_t *pte; 311 312 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & VPTE_FRAME); 313 return (&pte[pmap_pte_index(va)]); 314 } 315 316 /* Return a pointer to the PT slot that corresponds to a VA */ 317 static __inline pt_entry_t * 318 pmap_pte(pmap_t pmap, vm_offset_t va) 319 { 320 pd_entry_t *pde; 321 322 pde = pmap_pde(pmap, va); 323 if (pde == NULL || (*pde & VPTE_V) == 0) 324 return NULL; 325 if ((*pde & VPTE_PS) != 0) /* compat with i386 pmap_pte() */ 326 return ((pt_entry_t *)pde); 327 return (pmap_pde_to_pte(pde, va)); 328 } 329 330 331 #if JGV 332 PMAP_INLINE pt_entry_t * 333 vtopte(vm_offset_t va) 334 { 335 uint64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + 336 NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 337 338 return (PTmap + ((va >> PAGE_SHIFT) & mask)); 339 } 340 341 static __inline pd_entry_t * 342 vtopde(vm_offset_t va) 343 { 344 uint64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + 345 NPML4EPGSHIFT)) - 1); 346 347 return (PDmap + ((va >> PDRSHIFT) & mask)); 348 } 349 #else 350 static PMAP_INLINE pt_entry_t * 351 vtopte(vm_offset_t va) 352 { 353 pt_entry_t *x; 354 x = pmap_pte(&kernel_pmap, va); 355 assert(x != NULL); 356 return x; 357 } 358 359 static __inline pd_entry_t * 360 vtopde(vm_offset_t va) 361 { 362 pd_entry_t *x; 363 x = pmap_pde(&kernel_pmap, va); 364 assert(x != NULL); 365 return x; 366 } 367 #endif 368 369 static uint64_t 370 allocpages(vm_paddr_t *firstaddr, int n) 371 { 372 uint64_t ret; 373 374 ret = *firstaddr; 375 #if JGV 376 bzero((void *)ret, n * PAGE_SIZE); 377 #endif 378 *firstaddr += n * PAGE_SIZE; 379 return (ret); 380 } 381 382 static void 383 create_dmap_vmm(vm_paddr_t *firstaddr) 384 { 385 void *stack_addr; 386 int pml4_stack_index; 387 int pdp_stack_index; 388 int pd_stack_index; 389 long i,j; 390 int regs[4]; 391 int amd_feature; 392 393 uint64_t KPDP_DMAP_phys = allocpages(firstaddr, NDMPML4E); 394 uint64_t KPDP_VSTACK_phys = allocpages(firstaddr, 1); 395 uint64_t KPD_VSTACK_phys = allocpages(firstaddr, 1); 396 397 pml4_entry_t *KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 398 pdp_entry_t *KPDP_DMAP_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_DMAP_phys); 399 pdp_entry_t *KPDP_VSTACK_virt = (pdp_entry_t *)PHYS_TO_DMAP(KPDP_VSTACK_phys); 400 pd_entry_t *KPD_VSTACK_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_VSTACK_phys); 401 402 bzero(KPDP_DMAP_virt, NDMPML4E * PAGE_SIZE); 403 bzero(KPDP_VSTACK_virt, 1 * PAGE_SIZE); 404 bzero(KPD_VSTACK_virt, 1 * PAGE_SIZE); 405 406 do_cpuid(0x80000001, regs); 407 amd_feature = regs[3]; 408 409 /* Build the mappings for the first 512GB */ 410 if (amd_feature & AMDID_PAGE1GB) { 411 /* In pages of 1 GB, if supported */ 412 for (i = 0; i < NPDPEPG; i++) { 413 KPDP_DMAP_virt[i] = ((uint64_t)i << PDPSHIFT); 414 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 415 } 416 } else { 417 /* In page of 2MB, otherwise */ 418 for (i = 0; i < NPDPEPG; i++) { 419 uint64_t KPD_DMAP_phys = allocpages(firstaddr, 1); 420 pd_entry_t *KPD_DMAP_virt = (pd_entry_t *)PHYS_TO_DMAP(KPD_DMAP_phys); 421 422 bzero(KPD_DMAP_virt, PAGE_SIZE); 423 424 KPDP_DMAP_virt[i] = KPD_DMAP_phys; 425 KPDP_DMAP_virt[i] |= VPTE_RW | VPTE_V | VPTE_U; 426 427 /* For each PD, we have to allocate NPTEPG PT */ 428 for (j = 0; j < NPTEPG; j++) { 429 KPD_DMAP_virt[j] = (i << PDPSHIFT) | (j << PDRSHIFT); 430 KPD_DMAP_virt[j] |= VPTE_RW | VPTE_V | VPTE_PS | VPTE_U; 431 } 432 } 433 } 434 435 /* DMAP for the first 512G */ 436 KPML4virt[0] = KPDP_DMAP_phys; 437 KPML4virt[0] |= VPTE_RW | VPTE_V | VPTE_U; 438 439 /* create a 2 MB map of the new stack */ 440 pml4_stack_index = (uint64_t)&stack_addr >> PML4SHIFT; 441 KPML4virt[pml4_stack_index] = KPDP_VSTACK_phys; 442 KPML4virt[pml4_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 443 444 pdp_stack_index = ((uint64_t)&stack_addr & PML4MASK) >> PDPSHIFT; 445 KPDP_VSTACK_virt[pdp_stack_index] = KPD_VSTACK_phys; 446 KPDP_VSTACK_virt[pdp_stack_index] |= VPTE_RW | VPTE_V | VPTE_U; 447 448 pd_stack_index = ((uint64_t)&stack_addr & PDPMASK) >> PDRSHIFT; 449 KPD_VSTACK_virt[pd_stack_index] = (uint64_t) vkernel_stack; 450 KPD_VSTACK_virt[pd_stack_index] |= VPTE_RW | VPTE_V | VPTE_U | VPTE_PS; 451 } 452 453 static void 454 create_pagetables(vm_paddr_t *firstaddr, int64_t ptov_offset) 455 { 456 int i; 457 pml4_entry_t *KPML4virt; 458 pdp_entry_t *KPDPvirt; 459 pd_entry_t *KPDvirt; 460 pt_entry_t *KPTvirt; 461 int kpml4i = pmap_pml4e_index(ptov_offset); 462 int kpdpi = pmap_pdpe_index(ptov_offset); 463 int kpdi = pmap_pde_index(ptov_offset); 464 465 /* 466 * Calculate NKPT - number of kernel page tables. We have to 467 * accomodoate prealloction of the vm_page_array, dump bitmap, 468 * MSGBUF_SIZE, and other stuff. Be generous. 469 * 470 * Maxmem is in pages. 471 */ 472 nkpt = (Maxmem * (sizeof(struct vm_page) * 2) + MSGBUF_SIZE) / NBPDR; 473 /* 474 * Allocate pages 475 */ 476 KPML4phys = allocpages(firstaddr, 1); 477 KPDPphys = allocpages(firstaddr, NKPML4E); 478 KPDphys = allocpages(firstaddr, NKPDPE); 479 KPTphys = allocpages(firstaddr, nkpt); 480 481 KPML4virt = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 482 KPDPvirt = (pdp_entry_t *)PHYS_TO_DMAP(KPDPphys); 483 KPDvirt = (pd_entry_t *)PHYS_TO_DMAP(KPDphys); 484 KPTvirt = (pt_entry_t *)PHYS_TO_DMAP(KPTphys); 485 486 bzero(KPML4virt, 1 * PAGE_SIZE); 487 bzero(KPDPvirt, NKPML4E * PAGE_SIZE); 488 bzero(KPDvirt, NKPDPE * PAGE_SIZE); 489 bzero(KPTvirt, nkpt * PAGE_SIZE); 490 491 /* Now map the page tables at their location within PTmap */ 492 for (i = 0; i < nkpt; i++) { 493 KPDvirt[i + kpdi] = KPTphys + (i << PAGE_SHIFT); 494 KPDvirt[i + kpdi] |= VPTE_RW | VPTE_V | VPTE_U; 495 } 496 497 /* And connect up the PD to the PDP */ 498 for (i = 0; i < NKPDPE; i++) { 499 KPDPvirt[i + kpdpi] = KPDphys + (i << PAGE_SHIFT); 500 KPDPvirt[i + kpdpi] |= VPTE_RW | VPTE_V | VPTE_U; 501 } 502 503 /* And recursively map PML4 to itself in order to get PTmap */ 504 KPML4virt[PML4PML4I] = KPML4phys; 505 KPML4virt[PML4PML4I] |= VPTE_RW | VPTE_V | VPTE_U; 506 507 /* Connect the KVA slot up to the PML4 */ 508 KPML4virt[kpml4i] = KPDPphys; 509 KPML4virt[kpml4i] |= VPTE_RW | VPTE_V | VPTE_U; 510 } 511 512 /* 513 * Typically used to initialize a fictitious page by vm/device_pager.c 514 */ 515 void 516 pmap_page_init(struct vm_page *m) 517 { 518 vm_page_init(m); 519 TAILQ_INIT(&m->md.pv_list); 520 } 521 522 /* 523 * Bootstrap the system enough to run with virtual memory. 524 * 525 * On the i386 this is called after mapping has already been enabled 526 * and just syncs the pmap module with what has already been done. 527 * [We can't call it easily with mapping off since the kernel is not 528 * mapped with PA == VA, hence we would have to relocate every address 529 * from the linked base (virtual) address "KERNBASE" to the actual 530 * (physical) address starting relative to 0] 531 */ 532 void 533 pmap_bootstrap(vm_paddr_t *firstaddr, int64_t ptov_offset) 534 { 535 vm_offset_t va; 536 pt_entry_t *pte; 537 538 /* 539 * Create an initial set of page tables to run the kernel in. 540 */ 541 create_pagetables(firstaddr, ptov_offset); 542 543 /* Create the DMAP for the VMM */ 544 if(vmm_enabled) { 545 create_dmap_vmm(firstaddr); 546 } 547 548 virtual_start = KvaStart; 549 virtual_end = KvaEnd; 550 551 /* 552 * Initialize protection array. 553 */ 554 i386_protection_init(); 555 556 /* 557 * The kernel's pmap is statically allocated so we don't have to use 558 * pmap_create, which is unlikely to work correctly at this part of 559 * the boot sequence (XXX and which no longer exists). 560 * 561 * The kernel_pmap's pm_pteobj is used only for locking and not 562 * for mmu pages. 563 */ 564 kernel_pmap.pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); 565 kernel_pmap.pm_count = 1; 566 /* don't allow deactivation */ 567 kernel_pmap.pm_active = (cpumask_t)-1 & ~CPUMASK_LOCK; 568 kernel_pmap.pm_pteobj = NULL; /* see pmap_init */ 569 TAILQ_INIT(&kernel_pmap.pm_pvlist); 570 TAILQ_INIT(&kernel_pmap.pm_pvlist_free); 571 lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok"); 572 spin_init(&kernel_pmap.pm_spin); 573 574 /* 575 * Reserve some special page table entries/VA space for temporary 576 * mapping of pages. 577 */ 578 #define SYSMAP(c, p, v, n) \ 579 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 580 581 va = virtual_start; 582 pte = pmap_pte(&kernel_pmap, va); 583 /* 584 * CMAP1/CMAP2 are used for zeroing and copying pages. 585 */ 586 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 587 588 #if JGV 589 /* 590 * Crashdump maps. 591 */ 592 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 593 #endif 594 595 /* 596 * ptvmmap is used for reading arbitrary physical pages via 597 * /dev/mem. 598 */ 599 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 600 601 /* 602 * msgbufp is used to map the system message buffer. 603 * XXX msgbufmap is not used. 604 */ 605 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 606 atop(round_page(MSGBUF_SIZE))) 607 608 virtual_start = va; 609 610 *CMAP1 = 0; 611 /* Not ready to do an invltlb yet for VMM*/ 612 if (!vmm_enabled) 613 cpu_invltlb(); 614 615 } 616 617 /* 618 * Initialize the pmap module. 619 * Called by vm_init, to initialize any structures that the pmap 620 * system needs to map virtual memory. 621 * pmap_init has been enhanced to support in a fairly consistant 622 * way, discontiguous physical memory. 623 */ 624 void 625 pmap_init(void) 626 { 627 int i; 628 int initial_pvs; 629 630 /* 631 * object for kernel page table pages 632 */ 633 /* JG I think the number can be arbitrary */ 634 kptobj = vm_object_allocate(OBJT_DEFAULT, 5); 635 kernel_pmap.pm_pteobj = kptobj; 636 637 /* 638 * Allocate memory for random pmap data structures. Includes the 639 * pv_head_table. 640 */ 641 for(i = 0; i < vm_page_array_size; i++) { 642 vm_page_t m; 643 644 m = &vm_page_array[i]; 645 TAILQ_INIT(&m->md.pv_list); 646 m->md.pv_list_count = 0; 647 } 648 649 /* 650 * init the pv free list 651 */ 652 initial_pvs = vm_page_array_size; 653 if (initial_pvs < MINPV) 654 initial_pvs = MINPV; 655 pvzone = &pvzone_store; 656 pvinit = (struct pv_entry *) kmem_alloc(&kernel_map, 657 initial_pvs * sizeof (struct pv_entry)); 658 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, 659 initial_pvs); 660 661 /* 662 * Now it is safe to enable pv_table recording. 663 */ 664 pmap_initialized = TRUE; 665 } 666 667 /* 668 * Initialize the address space (zone) for the pv_entries. Set a 669 * high water mark so that the system can recover from excessive 670 * numbers of pv entries. 671 */ 672 void 673 pmap_init2(void) 674 { 675 int shpgperproc = PMAP_SHPGPERPROC; 676 677 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 678 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 679 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 680 pv_entry_high_water = 9 * (pv_entry_max / 10); 681 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); 682 } 683 684 685 /*************************************************** 686 * Low level helper routines..... 687 ***************************************************/ 688 689 /* 690 * The modification bit is not tracked for any pages in this range. XXX 691 * such pages in this maps should always use pmap_k*() functions and not 692 * be managed anyhow. 693 * 694 * XXX User and kernel address spaces are independant for virtual kernels, 695 * this function only applies to the kernel pmap. 696 */ 697 static int 698 pmap_track_modified(pmap_t pmap, vm_offset_t va) 699 { 700 if (pmap != &kernel_pmap) 701 return 1; 702 if ((va < clean_sva) || (va >= clean_eva)) 703 return 1; 704 else 705 return 0; 706 } 707 708 /* 709 * Extract the physical page address associated with the map/VA pair. 710 * 711 * No requirements. 712 */ 713 vm_paddr_t 714 pmap_extract(pmap_t pmap, vm_offset_t va) 715 { 716 vm_paddr_t rtval; 717 pt_entry_t *pte; 718 pd_entry_t pde, *pdep; 719 720 lwkt_gettoken(&vm_token); 721 rtval = 0; 722 pdep = pmap_pde(pmap, va); 723 if (pdep != NULL) { 724 pde = *pdep; 725 if (pde) { 726 if ((pde & VPTE_PS) != 0) { 727 /* JGV */ 728 rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 729 } else { 730 pte = pmap_pde_to_pte(pdep, va); 731 rtval = (*pte & VPTE_FRAME) | (va & PAGE_MASK); 732 } 733 } 734 } 735 lwkt_reltoken(&vm_token); 736 return rtval; 737 } 738 739 /* 740 * Similar to extract but checks protections, SMP-friendly short-cut for 741 * vm_fault_page[_quick](). 742 */ 743 vm_page_t 744 pmap_fault_page_quick(pmap_t pmap __unused, vm_offset_t vaddr __unused, 745 vm_prot_t prot __unused) 746 { 747 return(NULL); 748 } 749 750 /* 751 * Routine: pmap_kextract 752 * Function: 753 * Extract the physical page address associated 754 * kernel virtual address. 755 */ 756 vm_paddr_t 757 pmap_kextract(vm_offset_t va) 758 { 759 pd_entry_t pde; 760 vm_paddr_t pa; 761 762 KKASSERT(va >= KvaStart && va < KvaEnd); 763 764 /* 765 * The DMAP region is not included in [KvaStart, KvaEnd) 766 */ 767 #if 0 768 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 769 pa = DMAP_TO_PHYS(va); 770 } else { 771 #endif 772 pde = *vtopde(va); 773 if (pde & VPTE_PS) { 774 /* JGV */ 775 pa = (pde & PG_PS_FRAME) | (va & PDRMASK); 776 } else { 777 /* 778 * Beware of a concurrent promotion that changes the 779 * PDE at this point! For example, vtopte() must not 780 * be used to access the PTE because it would use the 781 * new PDE. It is, however, safe to use the old PDE 782 * because the page table page is preserved by the 783 * promotion. 784 */ 785 pa = *pmap_pde_to_pte(&pde, va); 786 pa = (pa & VPTE_FRAME) | (va & PAGE_MASK); 787 } 788 #if 0 789 } 790 #endif 791 return pa; 792 } 793 794 /*************************************************** 795 * Low level mapping routines..... 796 ***************************************************/ 797 798 /* 799 * Enter a mapping into kernel_pmap. Mappings created in this fashion 800 * are not managed. Mappings must be immediately accessible on all cpus. 801 * 802 * Call pmap_inval_pte() to invalidate the virtual pte and clean out the 803 * real pmap and handle related races before storing the new vpte. 804 */ 805 void 806 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 807 { 808 pt_entry_t *pte; 809 pt_entry_t npte; 810 811 KKASSERT(va >= KvaStart && va < KvaEnd); 812 npte = pa | VPTE_RW | VPTE_V | VPTE_U; 813 pte = vtopte(va); 814 if (*pte & VPTE_V) 815 pmap_inval_pte(pte, &kernel_pmap, va); 816 *pte = npte; 817 } 818 819 /* 820 * Enter an unmanaged KVA mapping for the private use of the current 821 * cpu only. pmap_kenter_sync() may be called to make the mapping usable 822 * by other cpus. 823 * 824 * It is illegal for the mapping to be accessed by other cpus unleess 825 * pmap_kenter_sync*() is called. 826 */ 827 void 828 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) 829 { 830 pt_entry_t *pte; 831 pt_entry_t npte; 832 833 KKASSERT(va >= KvaStart && va < KvaEnd); 834 835 npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; 836 pte = vtopte(va); 837 838 if (*pte & VPTE_V) 839 pmap_inval_pte_quick(pte, &kernel_pmap, va); 840 *pte = npte; 841 } 842 843 /* 844 * Synchronize a kvm mapping originally made for the private use on 845 * some other cpu so it can be used on our cpu. Turns out to be the 846 * same madvise() call, because we have to sync the real pmaps anyway. 847 * 848 * XXX add MADV_RESYNC to improve performance. 849 */ 850 void 851 pmap_kenter_sync_quick(vm_offset_t va) 852 { 853 cpu_invlpg((void *)va); 854 } 855 856 /* 857 * Remove an unmanaged mapping created with pmap_kenter*(). 858 */ 859 void 860 pmap_kremove(vm_offset_t va) 861 { 862 pt_entry_t *pte; 863 864 KKASSERT(va >= KvaStart && va < KvaEnd); 865 866 pte = vtopte(va); 867 if (*pte & VPTE_V) 868 pmap_inval_pte(pte, &kernel_pmap, va); 869 *pte = 0; 870 } 871 872 /* 873 * Remove an unmanaged mapping created with pmap_kenter*() but synchronize 874 * only with this cpu. 875 * 876 * Unfortunately because we optimize new entries by testing VPTE_V later 877 * on, we actually still have to synchronize with all the cpus. XXX maybe 878 * store a junk value and test against 0 in the other places instead? 879 */ 880 void 881 pmap_kremove_quick(vm_offset_t va) 882 { 883 pt_entry_t *pte; 884 885 KKASSERT(va >= KvaStart && va < KvaEnd); 886 887 pte = vtopte(va); 888 if (*pte & VPTE_V) 889 pmap_inval_pte(pte, &kernel_pmap, va); /* NOT _quick */ 890 *pte = 0; 891 } 892 893 /* 894 * Used to map a range of physical addresses into kernel 895 * virtual address space. 896 * 897 * For now, VM is already on, we only need to map the 898 * specified memory. 899 */ 900 vm_offset_t 901 pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) 902 { 903 return PHYS_TO_DMAP(start); 904 } 905 906 907 /* 908 * Map a set of unmanaged VM pages into KVM. 909 */ 910 void 911 pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 912 { 913 vm_offset_t end_va; 914 915 end_va = va + count * PAGE_SIZE; 916 KKASSERT(va >= KvaStart && end_va < KvaEnd); 917 918 while (va < end_va) { 919 pt_entry_t *pte; 920 921 pte = vtopte(va); 922 if (*pte & VPTE_V) 923 pmap_inval_pte(pte, &kernel_pmap, va); 924 *pte = VM_PAGE_TO_PHYS(*m) | VPTE_RW | VPTE_V | VPTE_U; 925 va += PAGE_SIZE; 926 m++; 927 } 928 } 929 930 /* 931 * Undo the effects of pmap_qenter*(). 932 */ 933 void 934 pmap_qremove(vm_offset_t va, int count) 935 { 936 vm_offset_t end_va; 937 938 end_va = va + count * PAGE_SIZE; 939 KKASSERT(va >= KvaStart && end_va < KvaEnd); 940 941 while (va < end_va) { 942 pt_entry_t *pte; 943 944 pte = vtopte(va); 945 if (*pte & VPTE_V) 946 pmap_inval_pte(pte, &kernel_pmap, va); 947 *pte = 0; 948 va += PAGE_SIZE; 949 } 950 } 951 952 /* 953 * This routine works like vm_page_lookup() but also blocks as long as the 954 * page is busy. This routine does not busy the page it returns. 955 * 956 * Unless the caller is managing objects whos pages are in a known state, 957 * the call should be made with a critical section held so the page's object 958 * association remains valid on return. 959 */ 960 static vm_page_t 961 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 962 { 963 vm_page_t m; 964 965 ASSERT_LWKT_TOKEN_HELD(vm_object_token(object)); 966 m = vm_page_lookup_busy_wait(object, pindex, FALSE, "pplookp"); 967 968 return(m); 969 } 970 971 /* 972 * Create a new thread and optionally associate it with a (new) process. 973 * NOTE! the new thread's cpu may not equal the current cpu. 974 */ 975 void 976 pmap_init_thread(thread_t td) 977 { 978 /* enforce pcb placement */ 979 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; 980 td->td_savefpu = &td->td_pcb->pcb_save; 981 td->td_sp = (char *)td->td_pcb - 16; /* JG is -16 needed on x86_64? */ 982 } 983 984 /* 985 * This routine directly affects the fork perf for a process. 986 */ 987 void 988 pmap_init_proc(struct proc *p) 989 { 990 } 991 992 /*************************************************** 993 * Page table page management routines..... 994 ***************************************************/ 995 996 static __inline int pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, 997 vm_page_t m); 998 999 /* 1000 * This routine unholds page table pages, and if the hold count 1001 * drops to zero, then it decrements the wire count. 1002 * 1003 * We must recheck that this is the last hold reference after busy-sleeping 1004 * on the page. 1005 */ 1006 static int 1007 _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 1008 { 1009 vm_page_busy_wait(m, FALSE, "pmuwpt"); 1010 KASSERT(m->queue == PQ_NONE, 1011 ("_pmap_unwire_pte_hold: %p->queue != PQ_NONE", m)); 1012 1013 if (m->hold_count == 1) { 1014 /* 1015 * Unmap the page table page. 1016 */ 1017 //abort(); /* JG */ 1018 /* pmap_inval_add(info, pmap, -1); */ 1019 1020 if (m->pindex >= (NUPDE + NUPDPE)) { 1021 /* PDP page */ 1022 pml4_entry_t *pml4; 1023 pml4 = pmap_pml4e(pmap, va); 1024 *pml4 = 0; 1025 } else if (m->pindex >= NUPDE) { 1026 /* PD page */ 1027 pdp_entry_t *pdp; 1028 pdp = pmap_pdpe(pmap, va); 1029 *pdp = 0; 1030 } else { 1031 /* PT page */ 1032 pd_entry_t *pd; 1033 pd = pmap_pde(pmap, va); 1034 *pd = 0; 1035 } 1036 1037 KKASSERT(pmap->pm_stats.resident_count > 0); 1038 --pmap->pm_stats.resident_count; 1039 1040 if (pmap->pm_ptphint == m) 1041 pmap->pm_ptphint = NULL; 1042 1043 if (m->pindex < NUPDE) { 1044 /* We just released a PT, unhold the matching PD */ 1045 vm_page_t pdpg; 1046 1047 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & VPTE_FRAME); 1048 pmap_unwire_pte_hold(pmap, va, pdpg); 1049 } 1050 if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { 1051 /* We just released a PD, unhold the matching PDP */ 1052 vm_page_t pdppg; 1053 1054 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & VPTE_FRAME); 1055 pmap_unwire_pte_hold(pmap, va, pdppg); 1056 } 1057 1058 /* 1059 * This was our last hold, the page had better be unwired 1060 * after we decrement wire_count. 1061 * 1062 * FUTURE NOTE: shared page directory page could result in 1063 * multiple wire counts. 1064 */ 1065 vm_page_unhold(m); 1066 --m->wire_count; 1067 KKASSERT(m->wire_count == 0); 1068 atomic_add_int(&vmstats.v_wire_count, -1); 1069 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1070 vm_page_flash(m); 1071 vm_page_free_zero(m); 1072 return 1; 1073 } else { 1074 KKASSERT(m->hold_count > 1); 1075 vm_page_unhold(m); 1076 vm_page_wakeup(m); 1077 return 0; 1078 } 1079 } 1080 1081 static __inline int 1082 pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 1083 { 1084 KKASSERT(m->hold_count > 0); 1085 if (m->hold_count > 1) { 1086 vm_page_unhold(m); 1087 return 0; 1088 } else { 1089 return _pmap_unwire_pte_hold(pmap, va, m); 1090 } 1091 } 1092 1093 /* 1094 * After removing a page table entry, this routine is used to 1095 * conditionally free the page, and manage the hold/wire counts. 1096 */ 1097 static int 1098 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 1099 { 1100 /* JG Use FreeBSD/amd64 or FreeBSD/i386 ptepde approaches? */ 1101 vm_pindex_t ptepindex; 1102 1103 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1104 1105 if (mpte == NULL) { 1106 /* 1107 * page table pages in the kernel_pmap are not managed. 1108 */ 1109 if (pmap == &kernel_pmap) 1110 return(0); 1111 ptepindex = pmap_pde_pindex(va); 1112 if (pmap->pm_ptphint && 1113 (pmap->pm_ptphint->pindex == ptepindex)) { 1114 mpte = pmap->pm_ptphint; 1115 } else { 1116 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1117 pmap->pm_ptphint = mpte; 1118 vm_page_wakeup(mpte); 1119 } 1120 } 1121 1122 return pmap_unwire_pte_hold(pmap, va, mpte); 1123 } 1124 1125 /* 1126 * Initialize pmap0/vmspace0 . Since process 0 never enters user mode we 1127 * just dummy it up so it works well enough for fork(). 1128 * 1129 * In DragonFly, process pmaps may only be used to manipulate user address 1130 * space, never kernel address space. 1131 */ 1132 void 1133 pmap_pinit0(struct pmap *pmap) 1134 { 1135 pmap_pinit(pmap); 1136 } 1137 1138 /* 1139 * Initialize a preallocated and zeroed pmap structure, 1140 * such as one in a vmspace structure. 1141 */ 1142 void 1143 pmap_pinit(struct pmap *pmap) 1144 { 1145 vm_page_t ptdpg; 1146 1147 /* 1148 * No need to allocate page table space yet but we do need a valid 1149 * page directory table. 1150 */ 1151 if (pmap->pm_pml4 == NULL) { 1152 pmap->pm_pml4 = 1153 (pml4_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); 1154 } 1155 1156 /* 1157 * Allocate an object for the ptes 1158 */ 1159 if (pmap->pm_pteobj == NULL) 1160 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + PML4PML4I + 1); 1161 1162 /* 1163 * Allocate the page directory page, unless we already have 1164 * one cached. If we used the cached page the wire_count will 1165 * already be set appropriately. 1166 */ 1167 if ((ptdpg = pmap->pm_pdirm) == NULL) { 1168 ptdpg = vm_page_grab(pmap->pm_pteobj, 1169 NUPDE + NUPDPE + PML4PML4I, 1170 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | 1171 VM_ALLOC_ZERO); 1172 pmap->pm_pdirm = ptdpg; 1173 vm_page_flag_clear(ptdpg, PG_MAPPED); 1174 vm_page_wire(ptdpg); 1175 vm_page_wakeup(ptdpg); 1176 pmap_kenter((vm_offset_t)pmap->pm_pml4, VM_PAGE_TO_PHYS(ptdpg)); 1177 } 1178 pmap->pm_count = 1; 1179 pmap->pm_active = 0; 1180 pmap->pm_ptphint = NULL; 1181 TAILQ_INIT(&pmap->pm_pvlist); 1182 TAILQ_INIT(&pmap->pm_pvlist_free); 1183 spin_init(&pmap->pm_spin); 1184 lwkt_token_init(&pmap->pm_token, "pmap_tok"); 1185 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1186 pmap->pm_stats.resident_count = 1; 1187 } 1188 1189 /* 1190 * Clean up a pmap structure so it can be physically freed. This routine 1191 * is called by the vmspace dtor function. A great deal of pmap data is 1192 * left passively mapped to improve vmspace management so we have a bit 1193 * of cleanup work to do here. 1194 * 1195 * No requirements. 1196 */ 1197 void 1198 pmap_puninit(pmap_t pmap) 1199 { 1200 vm_page_t p; 1201 1202 KKASSERT(pmap->pm_active == 0); 1203 if ((p = pmap->pm_pdirm) != NULL) { 1204 KKASSERT(pmap->pm_pml4 != NULL); 1205 pmap_kremove((vm_offset_t)pmap->pm_pml4); 1206 vm_page_busy_wait(p, FALSE, "pgpun"); 1207 p->wire_count--; 1208 atomic_add_int(&vmstats.v_wire_count, -1); 1209 vm_page_free_zero(p); 1210 pmap->pm_pdirm = NULL; 1211 } 1212 if (pmap->pm_pml4) { 1213 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE); 1214 pmap->pm_pml4 = NULL; 1215 } 1216 if (pmap->pm_pteobj) { 1217 vm_object_deallocate(pmap->pm_pteobj); 1218 pmap->pm_pteobj = NULL; 1219 } 1220 } 1221 1222 /* 1223 * Wire in kernel global address entries. To avoid a race condition 1224 * between pmap initialization and pmap_growkernel, this procedure 1225 * adds the pmap to the master list (which growkernel scans to update), 1226 * then copies the template. 1227 * 1228 * In a virtual kernel there are no kernel global address entries. 1229 * 1230 * No requirements. 1231 */ 1232 void 1233 pmap_pinit2(struct pmap *pmap) 1234 { 1235 spin_lock(&pmap_spin); 1236 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode); 1237 spin_unlock(&pmap_spin); 1238 } 1239 1240 /* 1241 * Attempt to release and free a vm_page in a pmap. Returns 1 on success, 1242 * 0 on failure (if the procedure had to sleep). 1243 * 1244 * When asked to remove the page directory page itself, we actually just 1245 * leave it cached so we do not have to incur the SMP inval overhead of 1246 * removing the kernel mapping. pmap_puninit() will take care of it. 1247 */ 1248 static int 1249 pmap_release_free_page(struct pmap *pmap, vm_page_t p) 1250 { 1251 /* 1252 * This code optimizes the case of freeing non-busy 1253 * page-table pages. Those pages are zero now, and 1254 * might as well be placed directly into the zero queue. 1255 */ 1256 if (vm_page_busy_try(p, FALSE)) { 1257 vm_page_sleep_busy(p, FALSE, "pmaprl"); 1258 return 0; 1259 } 1260 1261 /* 1262 * Remove the page table page from the processes address space. 1263 */ 1264 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1265 /* 1266 * We are the pml4 table itself. 1267 */ 1268 /* XXX anything to do here? */ 1269 } else if (p->pindex >= (NUPDE + NUPDPE)) { 1270 /* 1271 * We are a PDP page. 1272 * We look for the PML4 entry that points to us. 1273 */ 1274 vm_page_t m4 = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + PML4PML4I); 1275 KKASSERT(m4 != NULL); 1276 pml4_entry_t *pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m4)); 1277 int idx = (p->pindex - (NUPDE + NUPDPE)) % NPML4EPG; 1278 KKASSERT(pml4[idx] != 0); 1279 pml4[idx] = 0; 1280 m4->hold_count--; 1281 /* JG What about wire_count? */ 1282 } else if (p->pindex >= NUPDE) { 1283 /* 1284 * We are a PD page. 1285 * We look for the PDP entry that points to us. 1286 */ 1287 vm_page_t m3 = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + (p->pindex - NUPDE) / NPDPEPG); 1288 KKASSERT(m3 != NULL); 1289 pdp_entry_t *pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m3)); 1290 int idx = (p->pindex - NUPDE) % NPDPEPG; 1291 KKASSERT(pdp[idx] != 0); 1292 pdp[idx] = 0; 1293 m3->hold_count--; 1294 /* JG What about wire_count? */ 1295 } else { 1296 /* We are a PT page. 1297 * We look for the PD entry that points to us. 1298 */ 1299 vm_page_t m2 = vm_page_lookup(pmap->pm_pteobj, NUPDE + p->pindex / NPDEPG); 1300 KKASSERT(m2 != NULL); 1301 pd_entry_t *pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m2)); 1302 int idx = p->pindex % NPDEPG; 1303 pd[idx] = 0; 1304 m2->hold_count--; 1305 /* JG What about wire_count? */ 1306 } 1307 KKASSERT(pmap->pm_stats.resident_count > 0); 1308 --pmap->pm_stats.resident_count; 1309 1310 if (p->hold_count) { 1311 panic("pmap_release: freeing held page table page"); 1312 } 1313 if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex)) 1314 pmap->pm_ptphint = NULL; 1315 1316 /* 1317 * We leave the top-level page table page cached, wired, and mapped in 1318 * the pmap until the dtor function (pmap_puninit()) gets called. 1319 * However, still clean it up so we can set PG_ZERO. 1320 */ 1321 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1322 bzero(pmap->pm_pml4, PAGE_SIZE); 1323 vm_page_flag_set(p, PG_ZERO); 1324 vm_page_wakeup(p); 1325 } else { 1326 abort(); 1327 p->wire_count--; 1328 atomic_add_int(&vmstats.v_wire_count, -1); 1329 /* JG eventually revert to using vm_page_free_zero() */ 1330 vm_page_free(p); 1331 } 1332 return 1; 1333 } 1334 1335 /* 1336 * this routine is called if the page table page is not 1337 * mapped correctly. 1338 */ 1339 static vm_page_t 1340 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex) 1341 { 1342 vm_page_t m, pdppg, pdpg; 1343 1344 /* 1345 * Find or fabricate a new pagetable page. Handle allocation 1346 * races by checking m->valid. 1347 */ 1348 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1349 VM_ALLOC_NORMAL | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1350 1351 KASSERT(m->queue == PQ_NONE, 1352 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1353 1354 /* 1355 * Increment the hold count for the page we will be returning to 1356 * the caller. 1357 */ 1358 m->hold_count++; 1359 vm_page_wire(m); 1360 1361 /* 1362 * Map the pagetable page into the process address space, if 1363 * it isn't already there. 1364 */ 1365 ++pmap->pm_stats.resident_count; 1366 1367 if (ptepindex >= (NUPDE + NUPDPE)) { 1368 pml4_entry_t *pml4; 1369 vm_pindex_t pml4index; 1370 1371 /* Wire up a new PDP page */ 1372 pml4index = ptepindex - (NUPDE + NUPDPE); 1373 pml4 = &pmap->pm_pml4[pml4index]; 1374 *pml4 = VM_PAGE_TO_PHYS(m) | 1375 VPTE_RW | VPTE_V | VPTE_U | 1376 VPTE_A | VPTE_M; 1377 } else if (ptepindex >= NUPDE) { 1378 vm_pindex_t pml4index; 1379 vm_pindex_t pdpindex; 1380 pml4_entry_t *pml4; 1381 pdp_entry_t *pdp; 1382 1383 /* Wire up a new PD page */ 1384 pdpindex = ptepindex - NUPDE; 1385 pml4index = pdpindex >> NPML4EPGSHIFT; 1386 1387 pml4 = &pmap->pm_pml4[pml4index]; 1388 if ((*pml4 & VPTE_V) == 0) { 1389 /* Have to allocate a new PDP page, recurse */ 1390 if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index) 1391 == NULL) { 1392 --m->wire_count; 1393 vm_page_free(m); 1394 return (NULL); 1395 } 1396 } else { 1397 /* Add reference to the PDP page */ 1398 pdppg = PHYS_TO_VM_PAGE(*pml4 & VPTE_FRAME); 1399 pdppg->hold_count++; 1400 } 1401 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1402 1403 /* Now find the pdp page */ 1404 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1405 KKASSERT(*pdp == 0); /* JG DEBUG64 */ 1406 *pdp = VM_PAGE_TO_PHYS(m) | VPTE_RW | VPTE_V | VPTE_U | 1407 VPTE_A | VPTE_M; 1408 } else { 1409 vm_pindex_t pml4index; 1410 vm_pindex_t pdpindex; 1411 pml4_entry_t *pml4; 1412 pdp_entry_t *pdp; 1413 pd_entry_t *pd; 1414 1415 /* Wire up a new PT page */ 1416 pdpindex = ptepindex >> NPDPEPGSHIFT; 1417 pml4index = pdpindex >> NPML4EPGSHIFT; 1418 1419 /* First, find the pdp and check that its valid. */ 1420 pml4 = &pmap->pm_pml4[pml4index]; 1421 if ((*pml4 & VPTE_V) == 0) { 1422 /* We miss a PDP page. We ultimately need a PD page. 1423 * Recursively allocating a PD page will allocate 1424 * the missing PDP page and will also allocate 1425 * the PD page we need. 1426 */ 1427 /* Have to allocate a new PD page, recurse */ 1428 if (_pmap_allocpte(pmap, NUPDE + pdpindex) 1429 == NULL) { 1430 --m->wire_count; 1431 vm_page_free(m); 1432 return (NULL); 1433 } 1434 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1435 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1436 } else { 1437 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & VPTE_FRAME); 1438 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1439 if ((*pdp & VPTE_V) == 0) { 1440 /* Have to allocate a new PD page, recurse */ 1441 if (_pmap_allocpte(pmap, NUPDE + pdpindex) 1442 == NULL) { 1443 --m->wire_count; 1444 vm_page_free(m); 1445 return (NULL); 1446 } 1447 } else { 1448 /* Add reference to the PD page */ 1449 pdpg = PHYS_TO_VM_PAGE(*pdp & VPTE_FRAME); 1450 pdpg->hold_count++; 1451 } 1452 } 1453 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & VPTE_FRAME); 1454 1455 /* Now we know where the page directory page is */ 1456 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; 1457 KKASSERT(*pd == 0); /* JG DEBUG64 */ 1458 *pd = VM_PAGE_TO_PHYS(m) | VPTE_RW | VPTE_V | VPTE_U | 1459 VPTE_A | VPTE_M; 1460 } 1461 1462 /* 1463 * Set the page table hint 1464 */ 1465 pmap->pm_ptphint = m; 1466 vm_page_flag_set(m, PG_MAPPED); 1467 vm_page_wakeup(m); 1468 1469 return m; 1470 } 1471 1472 /* 1473 * Determine the page table page required to access the VA in the pmap 1474 * and allocate it if necessary. Return a held vm_page_t for the page. 1475 * 1476 * Only used with user pmaps. 1477 */ 1478 static vm_page_t 1479 pmap_allocpte(pmap_t pmap, vm_offset_t va) 1480 { 1481 vm_pindex_t ptepindex; 1482 pd_entry_t *pd; 1483 vm_page_t m; 1484 1485 ASSERT_LWKT_TOKEN_HELD(vm_object_token(pmap->pm_pteobj)); 1486 1487 /* 1488 * Calculate pagetable page index 1489 */ 1490 ptepindex = pmap_pde_pindex(va); 1491 1492 /* 1493 * Get the page directory entry 1494 */ 1495 pd = pmap_pde(pmap, va); 1496 1497 /* 1498 * This supports switching from a 2MB page to a 1499 * normal 4K page. 1500 */ 1501 if (pd != NULL && (*pd & (VPTE_PS | VPTE_V)) == (VPTE_PS | VPTE_V)) { 1502 panic("no promotion/demotion yet"); 1503 *pd = 0; 1504 pd = NULL; 1505 /*cpu_invltlb();*/ 1506 /*smp_invltlb();*/ 1507 } 1508 1509 /* 1510 * If the page table page is mapped, we just increment the 1511 * hold count, and activate it. 1512 */ 1513 if (pd != NULL && (*pd & VPTE_V) != 0) { 1514 /* YYY hint is used here on i386 */ 1515 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1516 pmap->pm_ptphint = m; 1517 vm_page_hold(m); 1518 vm_page_wakeup(m); 1519 return m; 1520 } 1521 /* 1522 * Here if the pte page isn't mapped, or if it has been deallocated. 1523 */ 1524 return _pmap_allocpte(pmap, ptepindex); 1525 } 1526 1527 1528 /*************************************************** 1529 * Pmap allocation/deallocation routines. 1530 ***************************************************/ 1531 1532 /* 1533 * Release any resources held by the given physical map. 1534 * Called when a pmap initialized by pmap_pinit is being released. 1535 * Should only be called if the map contains no valid mappings. 1536 * 1537 * Caller must hold pmap->pm_token 1538 */ 1539 static int pmap_release_callback(struct vm_page *p, void *data); 1540 1541 void 1542 pmap_release(struct pmap *pmap) 1543 { 1544 vm_object_t object = pmap->pm_pteobj; 1545 struct rb_vm_page_scan_info info; 1546 1547 KKASSERT(pmap != &kernel_pmap); 1548 1549 #if defined(DIAGNOSTIC) 1550 if (object->ref_count != 1) 1551 panic("pmap_release: pteobj reference count != 1"); 1552 #endif 1553 1554 info.pmap = pmap; 1555 info.object = object; 1556 1557 spin_lock(&pmap_spin); 1558 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode); 1559 spin_unlock(&pmap_spin); 1560 1561 vm_object_hold(object); 1562 do { 1563 info.error = 0; 1564 info.mpte = NULL; 1565 info.limit = object->generation; 1566 1567 vm_page_rb_tree_RB_SCAN(&object->rb_memq, NULL, 1568 pmap_release_callback, &info); 1569 if (info.error == 0 && info.mpte) { 1570 if (!pmap_release_free_page(pmap, info.mpte)) 1571 info.error = 1; 1572 } 1573 } while (info.error); 1574 vm_object_drop(object); 1575 } 1576 1577 static int 1578 pmap_release_callback(struct vm_page *p, void *data) 1579 { 1580 struct rb_vm_page_scan_info *info = data; 1581 1582 if (p->pindex == NUPDE + NUPDPE + PML4PML4I) { 1583 info->mpte = p; 1584 return(0); 1585 } 1586 if (!pmap_release_free_page(info->pmap, p)) { 1587 info->error = 1; 1588 return(-1); 1589 } 1590 if (info->object->generation != info->limit) { 1591 info->error = 1; 1592 return(-1); 1593 } 1594 return(0); 1595 } 1596 1597 /* 1598 * Grow the number of kernel page table entries, if needed. 1599 * 1600 * No requirements. 1601 */ 1602 void 1603 pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) 1604 { 1605 vm_offset_t addr; 1606 vm_paddr_t paddr; 1607 vm_offset_t ptppaddr; 1608 vm_page_t nkpg; 1609 pd_entry_t *pde, newpdir; 1610 pdp_entry_t newpdp; 1611 1612 addr = kend; 1613 1614 vm_object_hold(kptobj); 1615 if (kernel_vm_end == 0) { 1616 kernel_vm_end = KvaStart; 1617 nkpt = 0; 1618 while ((*pmap_pde(&kernel_pmap, kernel_vm_end) & VPTE_V) != 0) { 1619 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1620 nkpt++; 1621 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1622 kernel_vm_end = kernel_map.max_offset; 1623 break; 1624 } 1625 } 1626 } 1627 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1628 if (addr - 1 >= kernel_map.max_offset) 1629 addr = kernel_map.max_offset; 1630 while (kernel_vm_end < addr) { 1631 pde = pmap_pde(&kernel_pmap, kernel_vm_end); 1632 if (pde == NULL) { 1633 /* We need a new PDP entry */ 1634 nkpg = vm_page_alloc(kptobj, nkpt, 1635 VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM 1636 | VM_ALLOC_INTERRUPT); 1637 if (nkpg == NULL) { 1638 panic("pmap_growkernel: no memory to " 1639 "grow kernel"); 1640 } 1641 paddr = VM_PAGE_TO_PHYS(nkpg); 1642 if ((nkpg->flags & PG_ZERO) == 0) 1643 pmap_zero_page(paddr); 1644 vm_page_flag_clear(nkpg, PG_ZERO); 1645 newpdp = (pdp_entry_t)(paddr | 1646 VPTE_V | VPTE_RW | VPTE_U | 1647 VPTE_A | VPTE_M); 1648 *pmap_pdpe(&kernel_pmap, kernel_vm_end) = newpdp; 1649 nkpt++; 1650 continue; /* try again */ 1651 } 1652 if ((*pde & VPTE_V) != 0) { 1653 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1654 ~(PAGE_SIZE * NPTEPG - 1); 1655 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1656 kernel_vm_end = kernel_map.max_offset; 1657 break; 1658 } 1659 continue; 1660 } 1661 1662 /* 1663 * This index is bogus, but out of the way 1664 */ 1665 nkpg = vm_page_alloc(kptobj, nkpt, 1666 VM_ALLOC_NORMAL | 1667 VM_ALLOC_SYSTEM | 1668 VM_ALLOC_INTERRUPT); 1669 if (nkpg == NULL) 1670 panic("pmap_growkernel: no memory to grow kernel"); 1671 1672 vm_page_wire(nkpg); 1673 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1674 pmap_zero_page(ptppaddr); 1675 vm_page_flag_clear(nkpg, PG_ZERO); 1676 newpdir = (pd_entry_t)(ptppaddr | 1677 VPTE_V | VPTE_RW | VPTE_U | 1678 VPTE_A | VPTE_M); 1679 *pmap_pde(&kernel_pmap, kernel_vm_end) = newpdir; 1680 nkpt++; 1681 1682 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 1683 ~(PAGE_SIZE * NPTEPG - 1); 1684 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 1685 kernel_vm_end = kernel_map.max_offset; 1686 break; 1687 } 1688 } 1689 vm_object_drop(kptobj); 1690 } 1691 1692 /* 1693 * Add a reference to the specified pmap. 1694 * 1695 * No requirements. 1696 */ 1697 void 1698 pmap_reference(pmap_t pmap) 1699 { 1700 if (pmap) { 1701 lwkt_gettoken(&vm_token); 1702 ++pmap->pm_count; 1703 lwkt_reltoken(&vm_token); 1704 } 1705 } 1706 1707 /************************************************************************ 1708 * VMSPACE MANAGEMENT * 1709 ************************************************************************ 1710 * 1711 * The VMSPACE management we do in our virtual kernel must be reflected 1712 * in the real kernel. This is accomplished by making vmspace system 1713 * calls to the real kernel. 1714 */ 1715 void 1716 cpu_vmspace_alloc(struct vmspace *vm) 1717 { 1718 int r; 1719 void *rp; 1720 vpte_t vpte; 1721 1722 /* 1723 * If VMM enable, don't do nothing, we 1724 * are able to use real page tables 1725 */ 1726 if (vmm_enabled) 1727 return; 1728 1729 #define USER_SIZE (VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) 1730 1731 if (vmspace_create(&vm->vm_pmap, 0, NULL) < 0) 1732 panic("vmspace_create() failed"); 1733 1734 rp = vmspace_mmap(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1735 PROT_READ|PROT_WRITE, 1736 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE|MAP_FIXED, 1737 MemImageFd, 0); 1738 if (rp == MAP_FAILED) 1739 panic("vmspace_mmap: failed"); 1740 vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1741 MADV_NOSYNC, 0); 1742 vpte = VM_PAGE_TO_PHYS(vmspace_pmap(vm)->pm_pdirm) | VPTE_RW | VPTE_V | VPTE_U; 1743 r = vmspace_mcontrol(&vm->vm_pmap, VM_MIN_USER_ADDRESS, USER_SIZE, 1744 MADV_SETMAP, vpte); 1745 if (r < 0) 1746 panic("vmspace_mcontrol: failed"); 1747 } 1748 1749 void 1750 cpu_vmspace_free(struct vmspace *vm) 1751 { 1752 /* 1753 * If VMM enable, don't do nothing, we 1754 * are able to use real page tables 1755 */ 1756 if (vmm_enabled) 1757 return; 1758 1759 if (vmspace_destroy(&vm->vm_pmap) < 0) 1760 panic("vmspace_destroy() failed"); 1761 } 1762 1763 /*************************************************** 1764 * page management routines. 1765 ***************************************************/ 1766 1767 /* 1768 * free the pv_entry back to the free list. This function may be 1769 * called from an interrupt. 1770 */ 1771 static __inline void 1772 free_pv_entry(pv_entry_t pv) 1773 { 1774 pv_entry_count--; 1775 KKASSERT(pv_entry_count >= 0); 1776 zfree(pvzone, pv); 1777 } 1778 1779 /* 1780 * get a new pv_entry, allocating a block from the system 1781 * when needed. This function may be called from an interrupt. 1782 */ 1783 static pv_entry_t 1784 get_pv_entry(void) 1785 { 1786 pv_entry_count++; 1787 if (pv_entry_high_water && 1788 (pv_entry_count > pv_entry_high_water) && 1789 (pmap_pagedaemon_waken == 0)) { 1790 pmap_pagedaemon_waken = 1; 1791 wakeup(&vm_pages_needed); 1792 } 1793 return zalloc(pvzone); 1794 } 1795 1796 /* 1797 * This routine is very drastic, but can save the system 1798 * in a pinch. 1799 * 1800 * No requirements. 1801 */ 1802 void 1803 pmap_collect(void) 1804 { 1805 int i; 1806 vm_page_t m; 1807 static int warningdone=0; 1808 1809 if (pmap_pagedaemon_waken == 0) 1810 return; 1811 lwkt_gettoken(&vm_token); 1812 pmap_pagedaemon_waken = 0; 1813 1814 if (warningdone < 5) { 1815 kprintf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n"); 1816 warningdone++; 1817 } 1818 1819 for (i = 0; i < vm_page_array_size; i++) { 1820 m = &vm_page_array[i]; 1821 if (m->wire_count || m->hold_count) 1822 continue; 1823 if (vm_page_busy_try(m, TRUE) == 0) { 1824 if (m->wire_count == 0 && m->hold_count == 0) { 1825 pmap_remove_all(m); 1826 } 1827 vm_page_wakeup(m); 1828 } 1829 } 1830 lwkt_reltoken(&vm_token); 1831 } 1832 1833 1834 /* 1835 * If it is the first entry on the list, it is actually 1836 * in the header and we must copy the following entry up 1837 * to the header. Otherwise we must search the list for 1838 * the entry. In either case we free the now unused entry. 1839 * 1840 * caller must hold vm_token. 1841 */ 1842 static int 1843 pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va) 1844 { 1845 pv_entry_t pv; 1846 int rtval; 1847 1848 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1849 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1850 if (pmap == pv->pv_pmap && va == pv->pv_va) 1851 break; 1852 } 1853 } else { 1854 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1855 if (va == pv->pv_va) 1856 break; 1857 } 1858 } 1859 1860 /* 1861 * Note that pv_ptem is NULL if the page table page itself is not 1862 * managed, even if the page being removed IS managed. 1863 */ 1864 rtval = 0; 1865 /* JGXXX When can 'pv' be NULL? */ 1866 if (pv) { 1867 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1868 m->md.pv_list_count--; 1869 atomic_add_int(&m->object->agg_pv_list_count, -1); 1870 KKASSERT(m->md.pv_list_count >= 0); 1871 if (TAILQ_EMPTY(&m->md.pv_list)) 1872 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 1873 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1874 ++pmap->pm_generation; 1875 KKASSERT(pmap->pm_pteobj != NULL); 1876 vm_object_hold(pmap->pm_pteobj); 1877 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1878 vm_object_drop(pmap->pm_pteobj); 1879 free_pv_entry(pv); 1880 } 1881 return rtval; 1882 } 1883 1884 /* 1885 * Create a pv entry for page at pa for (pmap, va). If the page table page 1886 * holding the VA is managed, mpte will be non-NULL. 1887 */ 1888 static void 1889 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1890 { 1891 pv_entry_t pv; 1892 1893 crit_enter(); 1894 pv = get_pv_entry(); 1895 pv->pv_va = va; 1896 pv->pv_pmap = pmap; 1897 pv->pv_ptem = mpte; 1898 1899 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1900 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1901 m->md.pv_list_count++; 1902 atomic_add_int(&m->object->agg_pv_list_count, 1); 1903 1904 crit_exit(); 1905 } 1906 1907 /* 1908 * pmap_remove_pte: do the things to unmap a page in a process 1909 */ 1910 static int 1911 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va) 1912 { 1913 pt_entry_t oldpte; 1914 vm_page_t m; 1915 1916 oldpte = pmap_inval_loadandclear(ptq, pmap, va); 1917 if (oldpte & VPTE_WIRED) 1918 --pmap->pm_stats.wired_count; 1919 KKASSERT(pmap->pm_stats.wired_count >= 0); 1920 1921 #if 0 1922 /* 1923 * Machines that don't support invlpg, also don't support 1924 * PG_G. XXX PG_G is disabled for SMP so don't worry about 1925 * the SMP case. 1926 */ 1927 if (oldpte & PG_G) 1928 cpu_invlpg((void *)va); 1929 #endif 1930 KKASSERT(pmap->pm_stats.resident_count > 0); 1931 --pmap->pm_stats.resident_count; 1932 if (oldpte & VPTE_MANAGED) { 1933 m = PHYS_TO_VM_PAGE(oldpte); 1934 if (oldpte & VPTE_M) { 1935 #if defined(PMAP_DIAGNOSTIC) 1936 if (pmap_nw_modified(oldpte)) { 1937 kprintf("pmap_remove: modified page not " 1938 "writable: va: 0x%lx, pte: 0x%lx\n", 1939 va, oldpte); 1940 } 1941 #endif 1942 if (pmap_track_modified(pmap, va)) 1943 vm_page_dirty(m); 1944 } 1945 if (oldpte & VPTE_A) 1946 vm_page_flag_set(m, PG_REFERENCED); 1947 return pmap_remove_entry(pmap, m, va); 1948 } else { 1949 return pmap_unuse_pt(pmap, va, NULL); 1950 } 1951 1952 return 0; 1953 } 1954 1955 /* 1956 * pmap_remove_page: 1957 * 1958 * Remove a single page from a process address space. 1959 * 1960 * This function may not be called from an interrupt if the pmap is 1961 * not kernel_pmap. 1962 */ 1963 static void 1964 pmap_remove_page(struct pmap *pmap, vm_offset_t va) 1965 { 1966 pt_entry_t *pte; 1967 1968 pte = pmap_pte(pmap, va); 1969 if (pte == NULL) 1970 return; 1971 if ((*pte & VPTE_V) == 0) 1972 return; 1973 pmap_remove_pte(pmap, pte, va); 1974 } 1975 1976 /* 1977 * Remove the given range of addresses from the specified map. 1978 * 1979 * It is assumed that the start and end are properly rounded to 1980 * the page size. 1981 * 1982 * This function may not be called from an interrupt if the pmap is 1983 * not kernel_pmap. 1984 * 1985 * No requirements. 1986 */ 1987 void 1988 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 1989 { 1990 vm_offset_t va_next; 1991 pml4_entry_t *pml4e; 1992 pdp_entry_t *pdpe; 1993 pd_entry_t ptpaddr, *pde; 1994 pt_entry_t *pte; 1995 1996 if (pmap == NULL) 1997 return; 1998 1999 vm_object_hold(pmap->pm_pteobj); 2000 lwkt_gettoken(&vm_token); 2001 KKASSERT(pmap->pm_stats.resident_count >= 0); 2002 if (pmap->pm_stats.resident_count == 0) { 2003 lwkt_reltoken(&vm_token); 2004 vm_object_drop(pmap->pm_pteobj); 2005 return; 2006 } 2007 2008 /* 2009 * special handling of removing one page. a very 2010 * common operation and easy to short circuit some 2011 * code. 2012 */ 2013 if (sva + PAGE_SIZE == eva) { 2014 pde = pmap_pde(pmap, sva); 2015 if (pde && (*pde & VPTE_PS) == 0) { 2016 pmap_remove_page(pmap, sva); 2017 lwkt_reltoken(&vm_token); 2018 vm_object_drop(pmap->pm_pteobj); 2019 return; 2020 } 2021 } 2022 2023 for (; sva < eva; sva = va_next) { 2024 pml4e = pmap_pml4e(pmap, sva); 2025 if ((*pml4e & VPTE_V) == 0) { 2026 va_next = (sva + NBPML4) & ~PML4MASK; 2027 if (va_next < sva) 2028 va_next = eva; 2029 continue; 2030 } 2031 2032 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2033 if ((*pdpe & VPTE_V) == 0) { 2034 va_next = (sva + NBPDP) & ~PDPMASK; 2035 if (va_next < sva) 2036 va_next = eva; 2037 continue; 2038 } 2039 2040 /* 2041 * Calculate index for next page table. 2042 */ 2043 va_next = (sva + NBPDR) & ~PDRMASK; 2044 if (va_next < sva) 2045 va_next = eva; 2046 2047 pde = pmap_pdpe_to_pde(pdpe, sva); 2048 ptpaddr = *pde; 2049 2050 /* 2051 * Weed out invalid mappings. 2052 */ 2053 if (ptpaddr == 0) 2054 continue; 2055 2056 /* 2057 * Check for large page. 2058 */ 2059 if ((ptpaddr & VPTE_PS) != 0) { 2060 /* JG FreeBSD has more complex treatment here */ 2061 KKASSERT(*pde != 0); 2062 pmap_inval_pde(pde, pmap, sva); 2063 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2064 continue; 2065 } 2066 2067 /* 2068 * Limit our scan to either the end of the va represented 2069 * by the current page table page, or to the end of the 2070 * range being removed. 2071 */ 2072 if (va_next > eva) 2073 va_next = eva; 2074 2075 /* 2076 * NOTE: pmap_remove_pte() can block. 2077 */ 2078 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2079 sva += PAGE_SIZE) { 2080 if (*pte == 0) 2081 continue; 2082 if (pmap_remove_pte(pmap, pte, sva)) 2083 break; 2084 } 2085 } 2086 lwkt_reltoken(&vm_token); 2087 vm_object_drop(pmap->pm_pteobj); 2088 } 2089 2090 /* 2091 * Removes this physical page from all physical maps in which it resides. 2092 * Reflects back modify bits to the pager. 2093 * 2094 * This routine may not be called from an interrupt. 2095 * 2096 * No requirements. 2097 */ 2098 static void 2099 pmap_remove_all(vm_page_t m) 2100 { 2101 pt_entry_t *pte, tpte; 2102 pv_entry_t pv; 2103 2104 #if defined(PMAP_DIAGNOSTIC) 2105 /* 2106 * XXX this makes pmap_page_protect(NONE) illegal for non-managed 2107 * pages! 2108 */ 2109 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 2110 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%08llx", (long long)VM_PAGE_TO_PHYS(m)); 2111 } 2112 #endif 2113 2114 lwkt_gettoken(&vm_token); 2115 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2116 KKASSERT(pv->pv_pmap->pm_stats.resident_count > 0); 2117 --pv->pv_pmap->pm_stats.resident_count; 2118 2119 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2120 KKASSERT(pte != NULL); 2121 2122 tpte = pmap_inval_loadandclear(pte, pv->pv_pmap, pv->pv_va); 2123 if (tpte & VPTE_WIRED) 2124 pv->pv_pmap->pm_stats.wired_count--; 2125 KKASSERT(pv->pv_pmap->pm_stats.wired_count >= 0); 2126 2127 if (tpte & VPTE_A) 2128 vm_page_flag_set(m, PG_REFERENCED); 2129 2130 /* 2131 * Update the vm_page_t clean and reference bits. 2132 */ 2133 if (tpte & VPTE_M) { 2134 #if defined(PMAP_DIAGNOSTIC) 2135 if (pmap_nw_modified(tpte)) { 2136 kprintf( 2137 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 2138 pv->pv_va, tpte); 2139 } 2140 #endif 2141 if (pmap_track_modified(pv->pv_pmap, pv->pv_va)) 2142 vm_page_dirty(m); 2143 } 2144 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2145 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2146 ++pv->pv_pmap->pm_generation; 2147 m->md.pv_list_count--; 2148 atomic_add_int(&m->object->agg_pv_list_count, -1); 2149 KKASSERT(m->md.pv_list_count >= 0); 2150 if (TAILQ_EMPTY(&m->md.pv_list)) 2151 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2152 vm_object_hold(pv->pv_pmap->pm_pteobj); 2153 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2154 vm_object_drop(pv->pv_pmap->pm_pteobj); 2155 free_pv_entry(pv); 2156 } 2157 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); 2158 lwkt_reltoken(&vm_token); 2159 } 2160 2161 /* 2162 * Set the physical protection on the specified range of this map 2163 * as requested. 2164 * 2165 * This function may not be called from an interrupt if the map is 2166 * not the kernel_pmap. 2167 * 2168 * No requirements. 2169 */ 2170 void 2171 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2172 { 2173 vm_offset_t va_next; 2174 pml4_entry_t *pml4e; 2175 pdp_entry_t *pdpe; 2176 pd_entry_t ptpaddr, *pde; 2177 pt_entry_t *pte; 2178 2179 /* JG review for NX */ 2180 2181 if (pmap == NULL) 2182 return; 2183 2184 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2185 pmap_remove(pmap, sva, eva); 2186 return; 2187 } 2188 2189 if (prot & VM_PROT_WRITE) 2190 return; 2191 2192 lwkt_gettoken(&vm_token); 2193 2194 for (; sva < eva; sva = va_next) { 2195 2196 pml4e = pmap_pml4e(pmap, sva); 2197 if ((*pml4e & VPTE_V) == 0) { 2198 va_next = (sva + NBPML4) & ~PML4MASK; 2199 if (va_next < sva) 2200 va_next = eva; 2201 continue; 2202 } 2203 2204 pdpe = pmap_pml4e_to_pdpe(pml4e, sva); 2205 if ((*pdpe & VPTE_V) == 0) { 2206 va_next = (sva + NBPDP) & ~PDPMASK; 2207 if (va_next < sva) 2208 va_next = eva; 2209 continue; 2210 } 2211 2212 va_next = (sva + NBPDR) & ~PDRMASK; 2213 if (va_next < sva) 2214 va_next = eva; 2215 2216 pde = pmap_pdpe_to_pde(pdpe, sva); 2217 ptpaddr = *pde; 2218 2219 /* 2220 * Check for large page. 2221 */ 2222 if ((ptpaddr & VPTE_PS) != 0) { 2223 /* JG correct? */ 2224 pmap_clean_pde(pde, pmap, sva); 2225 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2226 continue; 2227 } 2228 2229 /* 2230 * Weed out invalid mappings. Note: we assume that the page 2231 * directory table is always allocated, and in kernel virtual. 2232 */ 2233 if (ptpaddr == 0) 2234 continue; 2235 2236 if (va_next > eva) 2237 va_next = eva; 2238 2239 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 2240 sva += PAGE_SIZE) { 2241 pt_entry_t pbits; 2242 vm_page_t m; 2243 2244 /* 2245 * Clean managed pages and also check the accessed 2246 * bit. Just remove write perms for unmanaged 2247 * pages. Be careful of races, turning off write 2248 * access will force a fault rather then setting 2249 * the modified bit at an unexpected time. 2250 */ 2251 if (*pte & VPTE_MANAGED) { 2252 pbits = pmap_clean_pte(pte, pmap, sva); 2253 m = NULL; 2254 if (pbits & VPTE_A) { 2255 m = PHYS_TO_VM_PAGE(pbits & VPTE_FRAME); 2256 vm_page_flag_set(m, PG_REFERENCED); 2257 atomic_clear_long(pte, VPTE_A); 2258 } 2259 if (pbits & VPTE_M) { 2260 if (pmap_track_modified(pmap, sva)) { 2261 if (m == NULL) 2262 m = PHYS_TO_VM_PAGE(pbits & VPTE_FRAME); 2263 vm_page_dirty(m); 2264 } 2265 } 2266 } else { 2267 pbits = pmap_setro_pte(pte, pmap, sva); 2268 } 2269 } 2270 } 2271 lwkt_reltoken(&vm_token); 2272 } 2273 2274 /* 2275 * Enter a managed page into a pmap. If the page is not wired related pmap 2276 * data can be destroyed at any time for later demand-operation. 2277 * 2278 * Insert the vm_page (m) at virtual address (v) in (pmap), with the 2279 * specified protection, and wire the mapping if requested. 2280 * 2281 * NOTE: This routine may not lazy-evaluate or lose information. The 2282 * page must actually be inserted into the given map NOW. 2283 * 2284 * NOTE: When entering a page at a KVA address, the pmap must be the 2285 * kernel_pmap. 2286 * 2287 * No requirements. 2288 */ 2289 void 2290 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2291 boolean_t wired, vm_map_entry_t entry __unused) 2292 { 2293 vm_paddr_t pa; 2294 pd_entry_t *pde; 2295 pt_entry_t *pte; 2296 vm_paddr_t opa; 2297 pt_entry_t origpte, newpte; 2298 vm_page_t mpte; 2299 2300 if (pmap == NULL) 2301 return; 2302 2303 va = trunc_page(va); 2304 2305 vm_object_hold(pmap->pm_pteobj); 2306 lwkt_gettoken(&vm_token); 2307 2308 /* 2309 * Get the page table page. The kernel_pmap's page table pages 2310 * are preallocated and have no associated vm_page_t. 2311 */ 2312 if (pmap == &kernel_pmap) 2313 mpte = NULL; 2314 else 2315 mpte = pmap_allocpte(pmap, va); 2316 2317 pde = pmap_pde(pmap, va); 2318 if (pde != NULL && (*pde & VPTE_V) != 0) { 2319 if ((*pde & VPTE_PS) != 0) 2320 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2321 pte = pmap_pde_to_pte(pde, va); 2322 } else { 2323 panic("pmap_enter: invalid page directory va=%#lx", va); 2324 } 2325 2326 KKASSERT(pte != NULL); 2327 /* 2328 * Deal with races on the original mapping (though don't worry 2329 * about VPTE_A races) by cleaning it. This will force a fault 2330 * if an attempt is made to write to the page. 2331 */ 2332 pa = VM_PAGE_TO_PHYS(m); 2333 origpte = pmap_clean_pte(pte, pmap, va); 2334 opa = origpte & VPTE_FRAME; 2335 2336 if (origpte & VPTE_PS) 2337 panic("pmap_enter: attempted pmap_enter on 2MB page"); 2338 2339 /* 2340 * Mapping has not changed, must be protection or wiring change. 2341 */ 2342 if (origpte && (opa == pa)) { 2343 /* 2344 * Wiring change, just update stats. We don't worry about 2345 * wiring PT pages as they remain resident as long as there 2346 * are valid mappings in them. Hence, if a user page is wired, 2347 * the PT page will be also. 2348 */ 2349 if (wired && ((origpte & VPTE_WIRED) == 0)) 2350 ++pmap->pm_stats.wired_count; 2351 else if (!wired && (origpte & VPTE_WIRED)) 2352 --pmap->pm_stats.wired_count; 2353 2354 /* 2355 * Remove the extra pte reference. Note that we cannot 2356 * optimize the RO->RW case because we have adjusted the 2357 * wiring count above and may need to adjust the wiring 2358 * bits below. 2359 */ 2360 if (mpte) 2361 mpte->hold_count--; 2362 2363 /* 2364 * We might be turning off write access to the page, 2365 * so we go ahead and sense modify status. 2366 */ 2367 if (origpte & VPTE_MANAGED) { 2368 if ((origpte & VPTE_M) && 2369 pmap_track_modified(pmap, va)) { 2370 vm_page_t om; 2371 om = PHYS_TO_VM_PAGE(opa); 2372 vm_page_dirty(om); 2373 } 2374 pa |= VPTE_MANAGED; 2375 KKASSERT(m->flags & PG_MAPPED); 2376 } 2377 goto validate; 2378 } 2379 /* 2380 * Mapping has changed, invalidate old range and fall through to 2381 * handle validating new mapping. 2382 */ 2383 if (opa) { 2384 int err; 2385 err = pmap_remove_pte(pmap, pte, va); 2386 if (err) 2387 panic("pmap_enter: pte vanished, va: 0x%lx", va); 2388 } 2389 2390 /* 2391 * Enter on the PV list if part of our managed memory. Note that we 2392 * raise IPL while manipulating pv_table since pmap_enter can be 2393 * called at interrupt time. 2394 */ 2395 if (pmap_initialized && 2396 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2397 pmap_insert_entry(pmap, va, mpte, m); 2398 pa |= VPTE_MANAGED; 2399 vm_page_flag_set(m, PG_MAPPED); 2400 } 2401 2402 /* 2403 * Increment counters 2404 */ 2405 ++pmap->pm_stats.resident_count; 2406 if (wired) 2407 pmap->pm_stats.wired_count++; 2408 2409 validate: 2410 /* 2411 * Now validate mapping with desired protection/wiring. 2412 */ 2413 newpte = (pt_entry_t) (pa | pte_prot(pmap, prot) | VPTE_V | VPTE_U); 2414 2415 if (wired) 2416 newpte |= VPTE_WIRED; 2417 // if (pmap != &kernel_pmap) 2418 newpte |= VPTE_U; 2419 2420 /* 2421 * If the mapping or permission bits are different from the 2422 * (now cleaned) original pte, an update is needed. We've 2423 * already downgraded or invalidated the page so all we have 2424 * to do now is update the bits. 2425 * 2426 * XXX should we synchronize RO->RW changes to avoid another 2427 * fault? 2428 */ 2429 if ((origpte & ~(VPTE_RW|VPTE_M|VPTE_A)) != newpte) { 2430 *pte = newpte | VPTE_A; 2431 if (newpte & VPTE_RW) 2432 vm_page_flag_set(m, PG_WRITEABLE); 2433 } 2434 KKASSERT((newpte & VPTE_MANAGED) == 0 || (m->flags & PG_MAPPED)); 2435 lwkt_reltoken(&vm_token); 2436 vm_object_drop(pmap->pm_pteobj); 2437 } 2438 2439 /* 2440 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. 2441 * 2442 * Currently this routine may only be used on user pmaps, not kernel_pmap. 2443 * 2444 * No requirements. 2445 */ 2446 void 2447 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) 2448 { 2449 pt_entry_t *pte; 2450 vm_paddr_t pa; 2451 vm_page_t mpte; 2452 vm_pindex_t ptepindex; 2453 pd_entry_t *ptepa; 2454 2455 KKASSERT(pmap != &kernel_pmap); 2456 2457 KKASSERT(va >= VM_MIN_USER_ADDRESS && va < VM_MAX_USER_ADDRESS); 2458 2459 /* 2460 * Calculate pagetable page index 2461 */ 2462 ptepindex = pmap_pde_pindex(va); 2463 2464 vm_object_hold(pmap->pm_pteobj); 2465 lwkt_gettoken(&vm_token); 2466 2467 do { 2468 /* 2469 * Get the page directory entry 2470 */ 2471 ptepa = pmap_pde(pmap, va); 2472 2473 /* 2474 * If the page table page is mapped, we just increment 2475 * the hold count, and activate it. 2476 */ 2477 if (ptepa && (*ptepa & VPTE_V) != 0) { 2478 if (*ptepa & VPTE_PS) 2479 panic("pmap_enter_quick: unexpected mapping into 2MB page"); 2480 if (pmap->pm_ptphint && 2481 (pmap->pm_ptphint->pindex == ptepindex)) { 2482 mpte = pmap->pm_ptphint; 2483 } else { 2484 mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex); 2485 pmap->pm_ptphint = mpte; 2486 vm_page_wakeup(mpte); 2487 } 2488 if (mpte) 2489 mpte->hold_count++; 2490 } else { 2491 mpte = _pmap_allocpte(pmap, ptepindex); 2492 } 2493 } while (mpte == NULL); 2494 2495 /* 2496 * Ok, now that the page table page has been validated, get the pte. 2497 * If the pte is already mapped undo mpte's hold_count and 2498 * just return. 2499 */ 2500 pte = pmap_pte(pmap, va); 2501 if (*pte & VPTE_V) { 2502 KKASSERT(mpte != NULL); 2503 pmap_unwire_pte_hold(pmap, va, mpte); 2504 pa = VM_PAGE_TO_PHYS(m); 2505 KKASSERT(((*pte ^ pa) & VPTE_FRAME) == 0); 2506 lwkt_reltoken(&vm_token); 2507 vm_object_drop(pmap->pm_pteobj); 2508 return; 2509 } 2510 2511 /* 2512 * Enter on the PV list if part of our managed memory 2513 */ 2514 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 2515 pmap_insert_entry(pmap, va, mpte, m); 2516 vm_page_flag_set(m, PG_MAPPED); 2517 } 2518 2519 /* 2520 * Increment counters 2521 */ 2522 ++pmap->pm_stats.resident_count; 2523 2524 pa = VM_PAGE_TO_PHYS(m); 2525 2526 /* 2527 * Now validate mapping with RO protection 2528 */ 2529 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2530 *pte = (vpte_t)pa | VPTE_V | VPTE_U; 2531 else 2532 *pte = (vpte_t)pa | VPTE_V | VPTE_U | VPTE_MANAGED; 2533 /*pmap_inval_add(&info, pmap, va); shouldn't be needed 0->valid */ 2534 /*pmap_inval_flush(&info); don't need for vkernel */ 2535 lwkt_reltoken(&vm_token); 2536 vm_object_drop(pmap->pm_pteobj); 2537 } 2538 2539 /* 2540 * Make a temporary mapping for a physical address. This is only intended 2541 * to be used for panic dumps. 2542 * 2543 * The caller is responsible for calling smp_invltlb(). 2544 */ 2545 void * 2546 pmap_kenter_temporary(vm_paddr_t pa, long i) 2547 { 2548 pmap_kenter_quick(crashdumpmap + (i * PAGE_SIZE), pa); 2549 return ((void *)crashdumpmap); 2550 } 2551 2552 #define MAX_INIT_PT (96) 2553 2554 /* 2555 * This routine preloads the ptes for a given object into the specified pmap. 2556 * This eliminates the blast of soft faults on process startup and 2557 * immediately after an mmap. 2558 * 2559 * No requirements. 2560 */ 2561 static int pmap_object_init_pt_callback(vm_page_t p, void *data); 2562 2563 void 2564 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, 2565 vm_object_t object, vm_pindex_t pindex, 2566 vm_size_t size, int limit) 2567 { 2568 struct rb_vm_page_scan_info info; 2569 struct lwp *lp; 2570 vm_size_t psize; 2571 2572 /* 2573 * We can't preinit if read access isn't set or there is no pmap 2574 * or object. 2575 */ 2576 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) 2577 return; 2578 2579 /* 2580 * We can't preinit if the pmap is not the current pmap 2581 */ 2582 lp = curthread->td_lwp; 2583 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) 2584 return; 2585 2586 psize = x86_64_btop(size); 2587 2588 if ((object->type != OBJT_VNODE) || 2589 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 2590 (object->resident_page_count > MAX_INIT_PT))) { 2591 return; 2592 } 2593 2594 if (psize + pindex > object->size) { 2595 if (object->size < pindex) 2596 return; 2597 psize = object->size - pindex; 2598 } 2599 2600 if (psize == 0) 2601 return; 2602 2603 /* 2604 * Use a red-black scan to traverse the requested range and load 2605 * any valid pages found into the pmap. 2606 * 2607 * We cannot safely scan the object's memq unless we are in a 2608 * critical section since interrupts can remove pages from objects. 2609 */ 2610 info.start_pindex = pindex; 2611 info.end_pindex = pindex + psize - 1; 2612 info.limit = limit; 2613 info.mpte = NULL; 2614 info.addr = addr; 2615 info.pmap = pmap; 2616 2617 vm_object_hold_shared(object); 2618 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, 2619 pmap_object_init_pt_callback, &info); 2620 vm_object_drop(object); 2621 } 2622 2623 static 2624 int 2625 pmap_object_init_pt_callback(vm_page_t p, void *data) 2626 { 2627 struct rb_vm_page_scan_info *info = data; 2628 vm_pindex_t rel_index; 2629 /* 2630 * don't allow an madvise to blow away our really 2631 * free pages allocating pv entries. 2632 */ 2633 if ((info->limit & MAP_PREFAULT_MADVISE) && 2634 vmstats.v_free_count < vmstats.v_free_reserved) { 2635 return(-1); 2636 } 2637 2638 /* 2639 * Ignore list markers and ignore pages we cannot instantly 2640 * busy (while holding the object token). 2641 */ 2642 if (p->flags & PG_MARKER) 2643 return 0; 2644 if (vm_page_busy_try(p, TRUE)) 2645 return 0; 2646 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2647 (p->flags & PG_FICTITIOUS) == 0) { 2648 if ((p->queue - p->pc) == PQ_CACHE) 2649 vm_page_deactivate(p); 2650 rel_index = p->pindex - info->start_pindex; 2651 pmap_enter_quick(info->pmap, 2652 info->addr + x86_64_ptob(rel_index), p); 2653 } 2654 vm_page_wakeup(p); 2655 return(0); 2656 } 2657 2658 /* 2659 * Return TRUE if the pmap is in shape to trivially 2660 * pre-fault the specified address. 2661 * 2662 * Returns FALSE if it would be non-trivial or if a 2663 * pte is already loaded into the slot. 2664 * 2665 * No requirements. 2666 */ 2667 int 2668 pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) 2669 { 2670 pt_entry_t *pte; 2671 pd_entry_t *pde; 2672 int ret; 2673 2674 lwkt_gettoken(&vm_token); 2675 pde = pmap_pde(pmap, addr); 2676 if (pde == NULL || *pde == 0) { 2677 ret = 0; 2678 } else { 2679 pte = pmap_pde_to_pte(pde, addr); 2680 ret = (*pte) ? 0 : 1; 2681 } 2682 lwkt_reltoken(&vm_token); 2683 return (ret); 2684 } 2685 2686 /* 2687 * Change the wiring attribute for a map/virtual-address pair. 2688 * 2689 * The mapping must already exist in the pmap. 2690 * No other requirements. 2691 */ 2692 void 2693 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired, 2694 vm_map_entry_t entry __unused) 2695 { 2696 pt_entry_t *pte; 2697 2698 if (pmap == NULL) 2699 return; 2700 2701 lwkt_gettoken(&vm_token); 2702 pte = pmap_pte(pmap, va); 2703 2704 if (wired && !pmap_pte_w(pte)) 2705 pmap->pm_stats.wired_count++; 2706 else if (!wired && pmap_pte_w(pte)) 2707 pmap->pm_stats.wired_count--; 2708 2709 /* 2710 * Wiring is not a hardware characteristic so there is no need to 2711 * invalidate TLB. However, in an SMP environment we must use 2712 * a locked bus cycle to update the pte (if we are not using 2713 * the pmap_inval_*() API that is)... it's ok to do this for simple 2714 * wiring changes. 2715 */ 2716 if (wired) 2717 atomic_set_long(pte, VPTE_WIRED); 2718 else 2719 atomic_clear_long(pte, VPTE_WIRED); 2720 lwkt_reltoken(&vm_token); 2721 } 2722 2723 /* 2724 * Copy the range specified by src_addr/len 2725 * from the source map to the range dst_addr/len 2726 * in the destination map. 2727 * 2728 * This routine is only advisory and need not do anything. 2729 */ 2730 void 2731 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 2732 vm_size_t len, vm_offset_t src_addr) 2733 { 2734 /* 2735 * XXX BUGGY. Amoung other things srcmpte is assumed to remain 2736 * valid through blocking calls, and that's just not going to 2737 * be the case. 2738 * 2739 * FIXME! 2740 */ 2741 return; 2742 } 2743 2744 /* 2745 * pmap_zero_page: 2746 * 2747 * Zero the specified physical page. 2748 * 2749 * This function may be called from an interrupt and no locking is 2750 * required. 2751 */ 2752 void 2753 pmap_zero_page(vm_paddr_t phys) 2754 { 2755 vm_offset_t va = PHYS_TO_DMAP(phys); 2756 2757 bzero((void *)va, PAGE_SIZE); 2758 } 2759 2760 /* 2761 * pmap_page_assertzero: 2762 * 2763 * Assert that a page is empty, panic if it isn't. 2764 */ 2765 void 2766 pmap_page_assertzero(vm_paddr_t phys) 2767 { 2768 int i; 2769 2770 crit_enter(); 2771 vm_offset_t virt = PHYS_TO_DMAP(phys); 2772 2773 for (i = 0; i < PAGE_SIZE; i += sizeof(int)) { 2774 if (*(int *)((char *)virt + i) != 0) { 2775 panic("pmap_page_assertzero() @ %p not zero!", 2776 (void *)virt); 2777 } 2778 } 2779 crit_exit(); 2780 } 2781 2782 /* 2783 * pmap_zero_page: 2784 * 2785 * Zero part of a physical page by mapping it into memory and clearing 2786 * its contents with bzero. 2787 * 2788 * off and size may not cover an area beyond a single hardware page. 2789 */ 2790 void 2791 pmap_zero_page_area(vm_paddr_t phys, int off, int size) 2792 { 2793 crit_enter(); 2794 vm_offset_t virt = PHYS_TO_DMAP(phys); 2795 bzero((char *)virt + off, size); 2796 crit_exit(); 2797 } 2798 2799 /* 2800 * pmap_copy_page: 2801 * 2802 * Copy the physical page from the source PA to the target PA. 2803 * This function may be called from an interrupt. No locking 2804 * is required. 2805 */ 2806 void 2807 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) 2808 { 2809 vm_offset_t src_virt, dst_virt; 2810 2811 crit_enter(); 2812 src_virt = PHYS_TO_DMAP(src); 2813 dst_virt = PHYS_TO_DMAP(dst); 2814 bcopy((void *)src_virt, (void *)dst_virt, PAGE_SIZE); 2815 crit_exit(); 2816 } 2817 2818 /* 2819 * pmap_copy_page_frag: 2820 * 2821 * Copy the physical page from the source PA to the target PA. 2822 * This function may be called from an interrupt. No locking 2823 * is required. 2824 */ 2825 void 2826 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) 2827 { 2828 vm_offset_t src_virt, dst_virt; 2829 2830 crit_enter(); 2831 src_virt = PHYS_TO_DMAP(src); 2832 dst_virt = PHYS_TO_DMAP(dst); 2833 bcopy((char *)src_virt + (src & PAGE_MASK), 2834 (char *)dst_virt + (dst & PAGE_MASK), 2835 bytes); 2836 crit_exit(); 2837 } 2838 2839 /* 2840 * Returns true if the pmap's pv is one of the first 16 pvs linked to 2841 * from this page. This count may be changed upwards or downwards 2842 * in the future; it is only necessary that true be returned for a small 2843 * subset of pmaps for proper page aging. 2844 * 2845 * No other requirements. 2846 */ 2847 boolean_t 2848 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2849 { 2850 pv_entry_t pv; 2851 int loops = 0; 2852 2853 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2854 return FALSE; 2855 2856 crit_enter(); 2857 lwkt_gettoken(&vm_token); 2858 2859 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2860 if (pv->pv_pmap == pmap) { 2861 lwkt_reltoken(&vm_token); 2862 crit_exit(); 2863 return TRUE; 2864 } 2865 loops++; 2866 if (loops >= 16) 2867 break; 2868 } 2869 lwkt_reltoken(&vm_token); 2870 crit_exit(); 2871 return (FALSE); 2872 } 2873 2874 /* 2875 * Remove all pages from specified address space this aids process 2876 * exit speeds. Also, this code is special cased for current 2877 * process only, but can have the more generic (and slightly slower) 2878 * mode enabled. This is much faster than pmap_remove in the case 2879 * of running down an entire address space. 2880 * 2881 * No other requirements. 2882 */ 2883 void 2884 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2885 { 2886 pt_entry_t *pte, tpte; 2887 pv_entry_t pv, npv; 2888 vm_page_t m; 2889 int save_generation; 2890 2891 if (pmap->pm_pteobj) 2892 vm_object_hold(pmap->pm_pteobj); 2893 lwkt_gettoken(&vm_token); 2894 2895 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2896 if (pv->pv_va >= eva || pv->pv_va < sva) { 2897 npv = TAILQ_NEXT(pv, pv_plist); 2898 continue; 2899 } 2900 2901 KKASSERT(pmap == pv->pv_pmap); 2902 2903 pte = pmap_pte(pmap, pv->pv_va); 2904 2905 /* 2906 * We cannot remove wired pages from a process' mapping 2907 * at this time 2908 */ 2909 if (*pte & VPTE_WIRED) { 2910 npv = TAILQ_NEXT(pv, pv_plist); 2911 continue; 2912 } 2913 tpte = pmap_inval_loadandclear(pte, pmap, pv->pv_va); 2914 2915 m = PHYS_TO_VM_PAGE(tpte & VPTE_FRAME); 2916 2917 KASSERT(m < &vm_page_array[vm_page_array_size], 2918 ("pmap_remove_pages: bad tpte %lx", tpte)); 2919 2920 KKASSERT(pmap->pm_stats.resident_count > 0); 2921 --pmap->pm_stats.resident_count; 2922 2923 /* 2924 * Update the vm_page_t clean and reference bits. 2925 */ 2926 if (tpte & VPTE_M) { 2927 vm_page_dirty(m); 2928 } 2929 2930 npv = TAILQ_NEXT(pv, pv_plist); 2931 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 2932 save_generation = ++pmap->pm_generation; 2933 2934 m->md.pv_list_count--; 2935 atomic_add_int(&m->object->agg_pv_list_count, -1); 2936 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2937 if (TAILQ_EMPTY(&m->md.pv_list)) 2938 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2939 2940 pmap_unuse_pt(pmap, pv->pv_va, pv->pv_ptem); 2941 free_pv_entry(pv); 2942 2943 /* 2944 * Restart the scan if we blocked during the unuse or free 2945 * calls and other removals were made. 2946 */ 2947 if (save_generation != pmap->pm_generation) { 2948 kprintf("Warning: pmap_remove_pages race-A avoided\n"); 2949 npv = TAILQ_FIRST(&pmap->pm_pvlist); 2950 } 2951 } 2952 lwkt_reltoken(&vm_token); 2953 if (pmap->pm_pteobj) 2954 vm_object_drop(pmap->pm_pteobj); 2955 } 2956 2957 /* 2958 * pmap_testbit tests bits in active mappings of a VM page. 2959 */ 2960 static boolean_t 2961 pmap_testbit(vm_page_t m, int bit) 2962 { 2963 pv_entry_t pv; 2964 pt_entry_t *pte; 2965 2966 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2967 return FALSE; 2968 2969 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 2970 return FALSE; 2971 2972 crit_enter(); 2973 2974 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2975 /* 2976 * if the bit being tested is the modified bit, then 2977 * mark clean_map and ptes as never 2978 * modified. 2979 */ 2980 if (bit & (VPTE_A|VPTE_M)) { 2981 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 2982 continue; 2983 } 2984 2985 #if defined(PMAP_DIAGNOSTIC) 2986 if (pv->pv_pmap == NULL) { 2987 kprintf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 2988 continue; 2989 } 2990 #endif 2991 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2992 if (*pte & bit) { 2993 crit_exit(); 2994 return TRUE; 2995 } 2996 } 2997 crit_exit(); 2998 return (FALSE); 2999 } 3000 3001 /* 3002 * This routine is used to clear bits in ptes. Certain bits require special 3003 * handling, in particular (on virtual kernels) the VPTE_M (modify) bit. 3004 * 3005 * This routine is only called with certain VPTE_* bit combinations. 3006 */ 3007 static __inline void 3008 pmap_clearbit(vm_page_t m, int bit) 3009 { 3010 pv_entry_t pv; 3011 pt_entry_t *pte; 3012 pt_entry_t pbits; 3013 3014 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3015 return; 3016 3017 crit_enter(); 3018 3019 /* 3020 * Loop over all current mappings setting/clearing as appropos If 3021 * setting RO do we need to clear the VAC? 3022 */ 3023 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3024 /* 3025 * don't write protect pager mappings 3026 */ 3027 if (bit == VPTE_RW) { 3028 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3029 continue; 3030 } 3031 3032 #if defined(PMAP_DIAGNOSTIC) 3033 if (pv->pv_pmap == NULL) { 3034 kprintf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va); 3035 continue; 3036 } 3037 #endif 3038 3039 /* 3040 * Careful here. We can use a locked bus instruction to 3041 * clear VPTE_A or VPTE_M safely but we need to synchronize 3042 * with the target cpus when we mess with VPTE_RW. 3043 * 3044 * On virtual kernels we must force a new fault-on-write 3045 * in the real kernel if we clear the Modify bit ourselves, 3046 * otherwise the real kernel will not get a new fault and 3047 * will never set our Modify bit again. 3048 */ 3049 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3050 if (*pte & bit) { 3051 if (bit == VPTE_RW) { 3052 /* 3053 * We must also clear VPTE_M when clearing 3054 * VPTE_RW 3055 */ 3056 pbits = pmap_clean_pte(pte, pv->pv_pmap, 3057 pv->pv_va); 3058 if (pbits & VPTE_M) 3059 vm_page_dirty(m); 3060 } else if (bit == VPTE_M) { 3061 /* 3062 * We do not have to make the page read-only 3063 * when clearing the Modify bit. The real 3064 * kernel will make the real PTE read-only 3065 * or otherwise detect the write and set 3066 * our VPTE_M again simply by us invalidating 3067 * the real kernel VA for the pmap (as we did 3068 * above). This allows the real kernel to 3069 * handle the write fault without forwarding 3070 * the fault to us. 3071 */ 3072 atomic_clear_long(pte, VPTE_M); 3073 } else if ((bit & (VPTE_RW|VPTE_M)) == (VPTE_RW|VPTE_M)) { 3074 /* 3075 * We've been asked to clear W & M, I guess 3076 * the caller doesn't want us to update 3077 * the dirty status of the VM page. 3078 */ 3079 pmap_clean_pte(pte, pv->pv_pmap, pv->pv_va); 3080 } else { 3081 /* 3082 * We've been asked to clear bits that do 3083 * not interact with hardware. 3084 */ 3085 atomic_clear_long(pte, bit); 3086 } 3087 } 3088 } 3089 crit_exit(); 3090 } 3091 3092 /* 3093 * Lower the permission for all mappings to a given page. 3094 * 3095 * No other requirements. 3096 */ 3097 void 3098 pmap_page_protect(vm_page_t m, vm_prot_t prot) 3099 { 3100 /* JG NX support? */ 3101 if ((prot & VM_PROT_WRITE) == 0) { 3102 lwkt_gettoken(&vm_token); 3103 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 3104 pmap_clearbit(m, VPTE_RW); 3105 vm_page_flag_clear(m, PG_WRITEABLE); 3106 } else { 3107 pmap_remove_all(m); 3108 } 3109 lwkt_reltoken(&vm_token); 3110 } 3111 } 3112 3113 vm_paddr_t 3114 pmap_phys_address(vm_pindex_t ppn) 3115 { 3116 return (x86_64_ptob(ppn)); 3117 } 3118 3119 /* 3120 * Return a count of reference bits for a page, clearing those bits. 3121 * It is not necessary for every reference bit to be cleared, but it 3122 * is necessary that 0 only be returned when there are truly no 3123 * reference bits set. 3124 * 3125 * XXX: The exact number of bits to check and clear is a matter that 3126 * should be tested and standardized at some point in the future for 3127 * optimal aging of shared pages. 3128 * 3129 * No other requirements. 3130 */ 3131 int 3132 pmap_ts_referenced(vm_page_t m) 3133 { 3134 pv_entry_t pv, pvf, pvn; 3135 pt_entry_t *pte; 3136 int rtval = 0; 3137 3138 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3139 return (rtval); 3140 3141 crit_enter(); 3142 lwkt_gettoken(&vm_token); 3143 3144 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3145 3146 pvf = pv; 3147 3148 do { 3149 pvn = TAILQ_NEXT(pv, pv_list); 3150 3151 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3152 3153 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3154 3155 if (!pmap_track_modified(pv->pv_pmap, pv->pv_va)) 3156 continue; 3157 3158 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 3159 3160 if (pte && (*pte & VPTE_A)) { 3161 atomic_clear_long(pte, VPTE_A); 3162 rtval++; 3163 if (rtval > 4) { 3164 break; 3165 } 3166 } 3167 } while ((pv = pvn) != NULL && pv != pvf); 3168 } 3169 lwkt_reltoken(&vm_token); 3170 crit_exit(); 3171 3172 return (rtval); 3173 } 3174 3175 /* 3176 * Return whether or not the specified physical page was modified 3177 * in any physical maps. 3178 * 3179 * No other requirements. 3180 */ 3181 boolean_t 3182 pmap_is_modified(vm_page_t m) 3183 { 3184 boolean_t res; 3185 3186 lwkt_gettoken(&vm_token); 3187 res = pmap_testbit(m, VPTE_M); 3188 lwkt_reltoken(&vm_token); 3189 return (res); 3190 } 3191 3192 /* 3193 * Clear the modify bits on the specified physical page. 3194 * 3195 * No other requirements. 3196 */ 3197 void 3198 pmap_clear_modify(vm_page_t m) 3199 { 3200 lwkt_gettoken(&vm_token); 3201 pmap_clearbit(m, VPTE_M); 3202 lwkt_reltoken(&vm_token); 3203 } 3204 3205 /* 3206 * Clear the reference bit on the specified physical page. 3207 * 3208 * No other requirements. 3209 */ 3210 void 3211 pmap_clear_reference(vm_page_t m) 3212 { 3213 lwkt_gettoken(&vm_token); 3214 pmap_clearbit(m, VPTE_A); 3215 lwkt_reltoken(&vm_token); 3216 } 3217 3218 /* 3219 * Miscellaneous support routines follow 3220 */ 3221 3222 static void 3223 i386_protection_init(void) 3224 { 3225 int *kp, prot; 3226 3227 kp = protection_codes; 3228 for (prot = 0; prot < 8; prot++) { 3229 if (prot & VM_PROT_READ) 3230 *kp |= 0; /* if it's VALID is readeable */ 3231 if (prot & VM_PROT_WRITE) 3232 *kp |= VPTE_RW; 3233 if (prot & VM_PROT_EXECUTE) 3234 *kp |= 0; /* if it's VALID is executable */ 3235 ++kp; 3236 } 3237 } 3238 3239 /* 3240 * Sets the memory attribute for the specified page. 3241 */ 3242 void 3243 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3244 { 3245 /* This is a vkernel, do nothing */ 3246 } 3247 3248 /* 3249 * Change the PAT attribute on an existing kernel memory map. Caller 3250 * must ensure that the virtual memory in question is not accessed 3251 * during the adjustment. 3252 */ 3253 void 3254 pmap_change_attr(vm_offset_t va, vm_size_t count, int mode) 3255 { 3256 /* This is a vkernel, do nothing */ 3257 } 3258 3259 /* 3260 * Perform the pmap work for mincore 3261 * 3262 * No other requirements. 3263 */ 3264 int 3265 pmap_mincore(pmap_t pmap, vm_offset_t addr) 3266 { 3267 pt_entry_t *ptep, pte; 3268 vm_page_t m; 3269 int val = 0; 3270 3271 lwkt_gettoken(&vm_token); 3272 ptep = pmap_pte(pmap, addr); 3273 3274 if (ptep && (pte = *ptep) != 0) { 3275 vm_paddr_t pa; 3276 3277 val = MINCORE_INCORE; 3278 if ((pte & VPTE_MANAGED) == 0) 3279 goto done; 3280 3281 pa = pte & VPTE_FRAME; 3282 3283 m = PHYS_TO_VM_PAGE(pa); 3284 3285 /* 3286 * Modified by us 3287 */ 3288 if (pte & VPTE_M) 3289 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 3290 /* 3291 * Modified by someone 3292 */ 3293 else if (m->dirty || pmap_is_modified(m)) 3294 val |= MINCORE_MODIFIED_OTHER; 3295 /* 3296 * Referenced by us 3297 */ 3298 if (pte & VPTE_A) 3299 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 3300 3301 /* 3302 * Referenced by someone 3303 */ 3304 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 3305 val |= MINCORE_REFERENCED_OTHER; 3306 vm_page_flag_set(m, PG_REFERENCED); 3307 } 3308 } 3309 done: 3310 lwkt_reltoken(&vm_token); 3311 return val; 3312 } 3313 3314 /* 3315 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new 3316 * vmspace will be ref'd and the old one will be deref'd. 3317 * 3318 * Caller must hold vmspace->vm_map.token for oldvm and newvm 3319 */ 3320 void 3321 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) 3322 { 3323 struct vmspace *oldvm; 3324 struct lwp *lp; 3325 3326 crit_enter(); 3327 oldvm = p->p_vmspace; 3328 if (oldvm != newvm) { 3329 p->p_vmspace = newvm; 3330 KKASSERT(p->p_nthreads == 1); 3331 lp = RB_ROOT(&p->p_lwp_tree); 3332 pmap_setlwpvm(lp, newvm); 3333 if (adjrefs) { 3334 sysref_get(&newvm->vm_sysref); 3335 sysref_put(&oldvm->vm_sysref); 3336 } 3337 } 3338 crit_exit(); 3339 } 3340 3341 /* 3342 * Set the vmspace for a LWP. The vmspace is almost universally set the 3343 * same as the process vmspace, but virtual kernels need to swap out contexts 3344 * on a per-lwp basis. 3345 */ 3346 void 3347 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) 3348 { 3349 struct vmspace *oldvm; 3350 struct pmap *pmap; 3351 3352 oldvm = lp->lwp_vmspace; 3353 if (oldvm == newvm) 3354 return; 3355 lp->lwp_vmspace = newvm; 3356 if (curthread->td_lwp != lp) 3357 return; 3358 /* 3359 * NOTE: We don't have to worry about the CPULOCK here because 3360 * the virtual kernel doesn't call this function when VMM 3361 * is enabled (and depends on the host kernel when it isn't). 3362 */ 3363 crit_enter(); 3364 pmap = vmspace_pmap(newvm); 3365 atomic_set_cpumask(&pmap->pm_active, CPUMASK(mycpu->gd_cpuid)); 3366 #if defined(SWTCH_OPTIM_STATS) 3367 tlb_flush_count++; 3368 #endif 3369 pmap = vmspace_pmap(oldvm); 3370 atomic_clear_cpumask(&pmap->pm_active, CPUMASK(mycpu->gd_cpuid)); 3371 crit_exit(); 3372 } 3373 3374 /* 3375 * The swtch code tried to switch in a heavy weight process whos pmap 3376 * is locked by another cpu. We have to wait for the lock to clear before 3377 * the pmap can be used. 3378 */ 3379 void 3380 pmap_interlock_wait (struct vmspace *vm) 3381 { 3382 pmap_t pmap = vmspace_pmap(vm); 3383 3384 while (pmap->pm_active & CPUMASK_LOCK) 3385 pthread_yield(); 3386 } 3387 3388 vm_offset_t 3389 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 3390 { 3391 3392 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 3393 return addr; 3394 } 3395 3396 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 3397 return addr; 3398 } 3399 3400 /* 3401 * Used by kmalloc/kfree, page already exists at va 3402 */ 3403 vm_page_t 3404 pmap_kvtom(vm_offset_t va) 3405 { 3406 vpte_t *ptep; 3407 3408 KKASSERT(va >= KvaStart && va < KvaEnd); 3409 ptep = vtopte(va); 3410 return(PHYS_TO_VM_PAGE(*ptep & PG_FRAME)); 3411 } 3412 3413 void 3414 pmap_object_init(vm_object_t object) 3415 { 3416 /* empty */ 3417 } 3418 3419 void 3420 pmap_object_free(vm_object_t object) 3421 { 3422 /* empty */ 3423 } 3424