1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * Copyright (c) 1994 John S. Dyson 4 * Copyright (c) 1994 David Greenman 5 * Copyright (c) 2003 Peter Wemm 6 * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 7 * Copyright (c) 2008, 2009 The DragonFly Project. 8 * Copyright (c) 2008, 2009 Jordan Gordeev. 9 * Copyright (c) 2011-2012 Matthew Dillon 10 * All rights reserved. 11 * 12 * This code is derived from software contributed to Berkeley by 13 * the Systems Programming Group of the University of Utah Computer 14 * Science Department and William Jolitz of UUNET Technologies Inc. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. All advertising materials mentioning features or use of this software 25 * must display the following acknowledgement: 26 * This product includes software developed by the University of 27 * California, Berkeley and its contributors. 28 * 4. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 */ 44 /* 45 * Manage physical address maps for x86-64 systems. 46 */ 47 48 #if JG 49 #include "opt_disable_pse.h" 50 #include "opt_pmap.h" 51 #endif 52 #include "opt_msgbuf.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/kernel.h> 57 #include <sys/proc.h> 58 #include <sys/msgbuf.h> 59 #include <sys/vmmeter.h> 60 #include <sys/mman.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_param.h> 64 #include <sys/sysctl.h> 65 #include <sys/lock.h> 66 #include <vm/vm_kern.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_pageout.h> 72 #include <vm/vm_pager.h> 73 #include <vm/vm_zone.h> 74 75 #include <sys/user.h> 76 #include <sys/thread2.h> 77 #include <sys/sysref2.h> 78 #include <sys/spinlock2.h> 79 #include <vm/vm_page2.h> 80 81 #include <machine/cputypes.h> 82 #include <machine/md_var.h> 83 #include <machine/specialreg.h> 84 #include <machine/smp.h> 85 #include <machine_base/apic/apicreg.h> 86 #include <machine/globaldata.h> 87 #include <machine/pmap.h> 88 #include <machine/pmap_inval.h> 89 #include <machine/inttypes.h> 90 91 #include <ddb/ddb.h> 92 93 #define PMAP_KEEP_PDIRS 94 #ifndef PMAP_SHPGPERPROC 95 #define PMAP_SHPGPERPROC 2000 96 #endif 97 98 #if defined(DIAGNOSTIC) 99 #define PMAP_DIAGNOSTIC 100 #endif 101 102 #define MINPV 2048 103 104 /* 105 * pmap debugging will report who owns a pv lock when blocking. 106 */ 107 #ifdef PMAP_DEBUG 108 109 #define PMAP_DEBUG_DECL ,const char *func, int lineno 110 #define PMAP_DEBUG_ARGS , __func__, __LINE__ 111 #define PMAP_DEBUG_COPY , func, lineno 112 113 #define pv_get(pmap, pindex) _pv_get(pmap, pindex \ 114 PMAP_DEBUG_ARGS) 115 #define pv_lock(pv) _pv_lock(pv \ 116 PMAP_DEBUG_ARGS) 117 #define pv_hold_try(pv) _pv_hold_try(pv \ 118 PMAP_DEBUG_ARGS) 119 #define pv_alloc(pmap, pindex, isnewp) _pv_alloc(pmap, pindex, isnewp \ 120 PMAP_DEBUG_ARGS) 121 122 #else 123 124 #define PMAP_DEBUG_DECL 125 #define PMAP_DEBUG_ARGS 126 #define PMAP_DEBUG_COPY 127 128 #define pv_get(pmap, pindex) _pv_get(pmap, pindex) 129 #define pv_lock(pv) _pv_lock(pv) 130 #define pv_hold_try(pv) _pv_hold_try(pv) 131 #define pv_alloc(pmap, pindex, isnewp) _pv_alloc(pmap, pindex, isnewp) 132 133 #endif 134 135 /* 136 * Get PDEs and PTEs for user/kernel address space 137 */ 138 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 139 140 #define pmap_pde_v(pte) ((*(pd_entry_t *)pte & PG_V) != 0) 141 #define pmap_pte_w(pte) ((*(pt_entry_t *)pte & PG_W) != 0) 142 #define pmap_pte_m(pte) ((*(pt_entry_t *)pte & PG_M) != 0) 143 #define pmap_pte_u(pte) ((*(pt_entry_t *)pte & PG_A) != 0) 144 #define pmap_pte_v(pte) ((*(pt_entry_t *)pte & PG_V) != 0) 145 146 /* 147 * Given a map and a machine independent protection code, 148 * convert to a vax protection code. 149 */ 150 #define pte_prot(m, p) \ 151 (protection_codes[p & (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)]) 152 static int protection_codes[8]; 153 154 struct pmap kernel_pmap; 155 static TAILQ_HEAD(,pmap) pmap_list = TAILQ_HEAD_INITIALIZER(pmap_list); 156 157 MALLOC_DEFINE(M_OBJPMAP, "objpmap", "pmaps associated with VM objects"); 158 159 vm_paddr_t avail_start; /* PA of first available physical page */ 160 vm_paddr_t avail_end; /* PA of last available physical page */ 161 vm_offset_t virtual2_start; /* cutout free area prior to kernel start */ 162 vm_offset_t virtual2_end; 163 vm_offset_t virtual_start; /* VA of first avail page (after kernel bss) */ 164 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 165 vm_offset_t KvaStart; /* VA start of KVA space */ 166 vm_offset_t KvaEnd; /* VA end of KVA space (non-inclusive) */ 167 vm_offset_t KvaSize; /* max size of kernel virtual address space */ 168 static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 169 static int pgeflag; /* PG_G or-in */ 170 static int pseflag; /* PG_PS or-in */ 171 172 static int ndmpdp; 173 static vm_paddr_t dmaplimit; 174 static int nkpt; 175 vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 176 177 static uint64_t KPTbase; 178 static uint64_t KPTphys; 179 static uint64_t KPDphys; /* phys addr of kernel level 2 */ 180 static uint64_t KPDbase; /* phys addr of kernel level 2 @ KERNBASE */ 181 uint64_t KPDPphys; /* phys addr of kernel level 3 */ 182 uint64_t KPML4phys; /* phys addr of kernel level 4 */ 183 184 static uint64_t DMPDphys; /* phys addr of direct mapped level 2 */ 185 static uint64_t DMPDPphys; /* phys addr of direct mapped level 3 */ 186 187 /* 188 * Data for the pv entry allocation mechanism 189 */ 190 static vm_zone_t pvzone; 191 static struct vm_zone pvzone_store; 192 static struct vm_object pvzone_obj; 193 static int pv_entry_max=0, pv_entry_high_water=0; 194 static int pmap_pagedaemon_waken = 0; 195 static struct pv_entry *pvinit; 196 197 /* 198 * All those kernel PT submaps that BSD is so fond of 199 */ 200 pt_entry_t *CMAP1 = NULL, *ptmmap; 201 caddr_t CADDR1 = NULL, ptvmmap = NULL; 202 static pt_entry_t *msgbufmap; 203 struct msgbuf *msgbufp=NULL; 204 205 /* 206 * Crashdump maps. 207 */ 208 static pt_entry_t *pt_crashdumpmap; 209 static caddr_t crashdumpmap; 210 211 static int pmap_yield_count = 64; 212 SYSCTL_INT(_machdep, OID_AUTO, pmap_yield_count, CTLFLAG_RW, 213 &pmap_yield_count, 0, "Yield during init_pt/release"); 214 static int pmap_mmu_optimize = 0; 215 SYSCTL_INT(_machdep, OID_AUTO, pmap_mmu_optimize, CTLFLAG_RW, 216 &pmap_mmu_optimize, 0, "Share page table pages when possible"); 217 218 #define DISABLE_PSE 219 220 static void pv_hold(pv_entry_t pv); 221 static int _pv_hold_try(pv_entry_t pv 222 PMAP_DEBUG_DECL); 223 static void pv_drop(pv_entry_t pv); 224 static void _pv_lock(pv_entry_t pv 225 PMAP_DEBUG_DECL); 226 static void pv_unlock(pv_entry_t pv); 227 static pv_entry_t _pv_alloc(pmap_t pmap, vm_pindex_t pindex, int *isnew 228 PMAP_DEBUG_DECL); 229 static pv_entry_t _pv_get(pmap_t pmap, vm_pindex_t pindex 230 PMAP_DEBUG_DECL); 231 static pv_entry_t pv_get_try(pmap_t pmap, vm_pindex_t pindex, int *errorp); 232 static pv_entry_t pv_find(pmap_t pmap, vm_pindex_t pindex); 233 static void pv_put(pv_entry_t pv); 234 static void pv_free(pv_entry_t pv); 235 static void *pv_pte_lookup(pv_entry_t pv, vm_pindex_t pindex); 236 static pv_entry_t pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, 237 pv_entry_t *pvpp); 238 static pv_entry_t pmap_allocpte_seg(pmap_t pmap, vm_pindex_t ptepindex, 239 pv_entry_t *pvpp, vm_map_entry_t entry, vm_offset_t va); 240 static void pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, 241 struct pmap_inval_info *info); 242 static vm_page_t pmap_remove_pv_page(pv_entry_t pv); 243 static int pmap_release_pv(pv_entry_t pv, pv_entry_t pvp); 244 245 struct pmap_scan_info; 246 static void pmap_remove_callback(pmap_t pmap, struct pmap_scan_info *info, 247 pv_entry_t pte_pv, pv_entry_t pt_pv, int sharept, 248 vm_offset_t va, pt_entry_t *ptep, void *arg __unused); 249 static void pmap_protect_callback(pmap_t pmap, struct pmap_scan_info *info, 250 pv_entry_t pte_pv, pv_entry_t pt_pv, int sharept, 251 vm_offset_t va, pt_entry_t *ptep, void *arg __unused); 252 253 static void i386_protection_init (void); 254 static void create_pagetables(vm_paddr_t *firstaddr); 255 static void pmap_remove_all (vm_page_t m); 256 static boolean_t pmap_testbit (vm_page_t m, int bit); 257 258 static pt_entry_t * pmap_pte_quick (pmap_t pmap, vm_offset_t va); 259 static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 260 261 static unsigned pdir4mb; 262 263 static int 264 pv_entry_compare(pv_entry_t pv1, pv_entry_t pv2) 265 { 266 if (pv1->pv_pindex < pv2->pv_pindex) 267 return(-1); 268 if (pv1->pv_pindex > pv2->pv_pindex) 269 return(1); 270 return(0); 271 } 272 273 RB_GENERATE2(pv_entry_rb_tree, pv_entry, pv_entry, 274 pv_entry_compare, vm_pindex_t, pv_pindex); 275 276 /* 277 * Move the kernel virtual free pointer to the next 278 * 2MB. This is used to help improve performance 279 * by using a large (2MB) page for much of the kernel 280 * (.text, .data, .bss) 281 */ 282 static 283 vm_offset_t 284 pmap_kmem_choose(vm_offset_t addr) 285 { 286 vm_offset_t newaddr = addr; 287 288 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 289 return newaddr; 290 } 291 292 /* 293 * pmap_pte_quick: 294 * 295 * Super fast pmap_pte routine best used when scanning the pv lists. 296 * This eliminates many course-grained invltlb calls. Note that many of 297 * the pv list scans are across different pmaps and it is very wasteful 298 * to do an entire invltlb when checking a single mapping. 299 */ 300 static __inline pt_entry_t *pmap_pte(pmap_t pmap, vm_offset_t va); 301 302 static 303 pt_entry_t * 304 pmap_pte_quick(pmap_t pmap, vm_offset_t va) 305 { 306 return pmap_pte(pmap, va); 307 } 308 309 /* 310 * Returns the pindex of a page table entry (representing a terminal page). 311 * There are NUPTE_TOTAL page table entries possible (a huge number) 312 * 313 * x86-64 has a 48-bit address space, where bit 47 is sign-extended out. 314 * We want to properly translate negative KVAs. 315 */ 316 static __inline 317 vm_pindex_t 318 pmap_pte_pindex(vm_offset_t va) 319 { 320 return ((va >> PAGE_SHIFT) & (NUPTE_TOTAL - 1)); 321 } 322 323 /* 324 * Returns the pindex of a page table. 325 */ 326 static __inline 327 vm_pindex_t 328 pmap_pt_pindex(vm_offset_t va) 329 { 330 return (NUPTE_TOTAL + ((va >> PDRSHIFT) & (NUPT_TOTAL - 1))); 331 } 332 333 /* 334 * Returns the pindex of a page directory. 335 */ 336 static __inline 337 vm_pindex_t 338 pmap_pd_pindex(vm_offset_t va) 339 { 340 return (NUPTE_TOTAL + NUPT_TOTAL + 341 ((va >> PDPSHIFT) & (NUPD_TOTAL - 1))); 342 } 343 344 static __inline 345 vm_pindex_t 346 pmap_pdp_pindex(vm_offset_t va) 347 { 348 return (NUPTE_TOTAL + NUPT_TOTAL + NUPD_TOTAL + 349 ((va >> PML4SHIFT) & (NUPDP_TOTAL - 1))); 350 } 351 352 static __inline 353 vm_pindex_t 354 pmap_pml4_pindex(void) 355 { 356 return (NUPTE_TOTAL + NUPT_TOTAL + NUPD_TOTAL + NUPDP_TOTAL); 357 } 358 359 /* 360 * Return various clipped indexes for a given VA 361 * 362 * Returns the index of a pte in a page table, representing a terminal 363 * page. 364 */ 365 static __inline 366 vm_pindex_t 367 pmap_pte_index(vm_offset_t va) 368 { 369 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 370 } 371 372 /* 373 * Returns the index of a pt in a page directory, representing a page 374 * table. 375 */ 376 static __inline 377 vm_pindex_t 378 pmap_pt_index(vm_offset_t va) 379 { 380 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 381 } 382 383 /* 384 * Returns the index of a pd in a page directory page, representing a page 385 * directory. 386 */ 387 static __inline 388 vm_pindex_t 389 pmap_pd_index(vm_offset_t va) 390 { 391 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 392 } 393 394 /* 395 * Returns the index of a pdp in the pml4 table, representing a page 396 * directory page. 397 */ 398 static __inline 399 vm_pindex_t 400 pmap_pdp_index(vm_offset_t va) 401 { 402 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 403 } 404 405 /* 406 * Generic procedure to index a pte from a pt, pd, or pdp. 407 * 408 * NOTE: Normally passed pindex as pmap_xx_index(). pmap_xx_pindex() is NOT 409 * a page table page index but is instead of PV lookup index. 410 */ 411 static 412 void * 413 pv_pte_lookup(pv_entry_t pv, vm_pindex_t pindex) 414 { 415 pt_entry_t *pte; 416 417 pte = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pv->pv_m)); 418 return(&pte[pindex]); 419 } 420 421 /* 422 * Return pointer to PDP slot in the PML4 423 */ 424 static __inline 425 pml4_entry_t * 426 pmap_pdp(pmap_t pmap, vm_offset_t va) 427 { 428 return (&pmap->pm_pml4[pmap_pdp_index(va)]); 429 } 430 431 /* 432 * Return pointer to PD slot in the PDP given a pointer to the PDP 433 */ 434 static __inline 435 pdp_entry_t * 436 pmap_pdp_to_pd(pml4_entry_t pdp_pte, vm_offset_t va) 437 { 438 pdp_entry_t *pd; 439 440 pd = (pdp_entry_t *)PHYS_TO_DMAP(pdp_pte & PG_FRAME); 441 return (&pd[pmap_pd_index(va)]); 442 } 443 444 /* 445 * Return pointer to PD slot in the PDP. 446 */ 447 static __inline 448 pdp_entry_t * 449 pmap_pd(pmap_t pmap, vm_offset_t va) 450 { 451 pml4_entry_t *pdp; 452 453 pdp = pmap_pdp(pmap, va); 454 if ((*pdp & PG_V) == 0) 455 return NULL; 456 return (pmap_pdp_to_pd(*pdp, va)); 457 } 458 459 /* 460 * Return pointer to PT slot in the PD given a pointer to the PD 461 */ 462 static __inline 463 pd_entry_t * 464 pmap_pd_to_pt(pdp_entry_t pd_pte, vm_offset_t va) 465 { 466 pd_entry_t *pt; 467 468 pt = (pd_entry_t *)PHYS_TO_DMAP(pd_pte & PG_FRAME); 469 return (&pt[pmap_pt_index(va)]); 470 } 471 472 /* 473 * Return pointer to PT slot in the PD 474 * 475 * SIMPLE PMAP NOTE: Simple pmaps (embedded in objects) do not have PDPs, 476 * so we cannot lookup the PD via the PDP. Instead we 477 * must look it up via the pmap. 478 */ 479 static __inline 480 pd_entry_t * 481 pmap_pt(pmap_t pmap, vm_offset_t va) 482 { 483 pdp_entry_t *pd; 484 pv_entry_t pv; 485 vm_pindex_t pd_pindex; 486 487 if (pmap->pm_flags & PMAP_FLAG_SIMPLE) { 488 pd_pindex = pmap_pd_pindex(va); 489 spin_lock(&pmap->pm_spin); 490 pv = pv_entry_rb_tree_RB_LOOKUP(&pmap->pm_pvroot, pd_pindex); 491 spin_unlock(&pmap->pm_spin); 492 if (pv == NULL || pv->pv_m == NULL) 493 return NULL; 494 return (pmap_pd_to_pt(VM_PAGE_TO_PHYS(pv->pv_m), va)); 495 } else { 496 pd = pmap_pd(pmap, va); 497 if (pd == NULL || (*pd & PG_V) == 0) 498 return NULL; 499 return (pmap_pd_to_pt(*pd, va)); 500 } 501 } 502 503 /* 504 * Return pointer to PTE slot in the PT given a pointer to the PT 505 */ 506 static __inline 507 pt_entry_t * 508 pmap_pt_to_pte(pd_entry_t pt_pte, vm_offset_t va) 509 { 510 pt_entry_t *pte; 511 512 pte = (pt_entry_t *)PHYS_TO_DMAP(pt_pte & PG_FRAME); 513 return (&pte[pmap_pte_index(va)]); 514 } 515 516 /* 517 * Return pointer to PTE slot in the PT 518 */ 519 static __inline 520 pt_entry_t * 521 pmap_pte(pmap_t pmap, vm_offset_t va) 522 { 523 pd_entry_t *pt; 524 525 pt = pmap_pt(pmap, va); 526 if (pt == NULL || (*pt & PG_V) == 0) 527 return NULL; 528 if ((*pt & PG_PS) != 0) 529 return ((pt_entry_t *)pt); 530 return (pmap_pt_to_pte(*pt, va)); 531 } 532 533 /* 534 * Of all the layers (PTE, PT, PD, PDP, PML4) the best one to cache is 535 * the PT layer. This will speed up core pmap operations considerably. 536 */ 537 static __inline 538 void 539 pv_cache(pv_entry_t pv, vm_pindex_t pindex) 540 { 541 if (pindex >= pmap_pt_pindex(0) && pindex <= pmap_pd_pindex(0)) 542 pv->pv_pmap->pm_pvhint = pv; 543 } 544 545 546 /* 547 * KVM - return address of PT slot in PD 548 */ 549 static __inline 550 pd_entry_t * 551 vtopt(vm_offset_t va) 552 { 553 uint64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + 554 NPML4EPGSHIFT)) - 1); 555 556 return (PDmap + ((va >> PDRSHIFT) & mask)); 557 } 558 559 /* 560 * KVM - return address of PTE slot in PT 561 */ 562 static __inline 563 pt_entry_t * 564 vtopte(vm_offset_t va) 565 { 566 uint64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + 567 NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 568 569 return (PTmap + ((va >> PAGE_SHIFT) & mask)); 570 } 571 572 static uint64_t 573 allocpages(vm_paddr_t *firstaddr, long n) 574 { 575 uint64_t ret; 576 577 ret = *firstaddr; 578 bzero((void *)ret, n * PAGE_SIZE); 579 *firstaddr += n * PAGE_SIZE; 580 return (ret); 581 } 582 583 static 584 void 585 create_pagetables(vm_paddr_t *firstaddr) 586 { 587 long i; /* must be 64 bits */ 588 long nkpt_base; 589 long nkpt_phys; 590 int j; 591 592 /* 593 * We are running (mostly) V=P at this point 594 * 595 * Calculate NKPT - number of kernel page tables. We have to 596 * accomodoate prealloction of the vm_page_array, dump bitmap, 597 * MSGBUF_SIZE, and other stuff. Be generous. 598 * 599 * Maxmem is in pages. 600 * 601 * ndmpdp is the number of 1GB pages we wish to map. 602 */ 603 ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; 604 if (ndmpdp < 4) /* Minimum 4GB of dirmap */ 605 ndmpdp = 4; 606 KKASSERT(ndmpdp <= NKPDPE * NPDEPG); 607 608 /* 609 * Starting at the beginning of kvm (not KERNBASE). 610 */ 611 nkpt_phys = (Maxmem * sizeof(struct vm_page) + NBPDR - 1) / NBPDR; 612 nkpt_phys += (Maxmem * sizeof(struct pv_entry) + NBPDR - 1) / NBPDR; 613 nkpt_phys += ((nkpt + nkpt + 1 + NKPML4E + NKPDPE + NDMPML4E + 614 ndmpdp) + 511) / 512; 615 nkpt_phys += 128; 616 617 /* 618 * Starting at KERNBASE - map 2G worth of page table pages. 619 * KERNBASE is offset -2G from the end of kvm. 620 */ 621 nkpt_base = (NPDPEPG - KPDPI) * NPTEPG; /* typically 2 x 512 */ 622 623 /* 624 * Allocate pages 625 */ 626 KPTbase = allocpages(firstaddr, nkpt_base); 627 KPTphys = allocpages(firstaddr, nkpt_phys); 628 KPML4phys = allocpages(firstaddr, 1); 629 KPDPphys = allocpages(firstaddr, NKPML4E); 630 KPDphys = allocpages(firstaddr, NKPDPE); 631 632 /* 633 * Calculate the page directory base for KERNBASE, 634 * that is where we start populating the page table pages. 635 * Basically this is the end - 2. 636 */ 637 KPDbase = KPDphys + ((NKPDPE - (NPDPEPG - KPDPI)) << PAGE_SHIFT); 638 639 DMPDPphys = allocpages(firstaddr, NDMPML4E); 640 if ((amd_feature & AMDID_PAGE1GB) == 0) 641 DMPDphys = allocpages(firstaddr, ndmpdp); 642 dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; 643 644 /* 645 * Fill in the underlying page table pages for the area around 646 * KERNBASE. This remaps low physical memory to KERNBASE. 647 * 648 * Read-only from zero to physfree 649 * XXX not fully used, underneath 2M pages 650 */ 651 for (i = 0; (i << PAGE_SHIFT) < *firstaddr; i++) { 652 ((pt_entry_t *)KPTbase)[i] = i << PAGE_SHIFT; 653 ((pt_entry_t *)KPTbase)[i] |= PG_RW | PG_V | PG_G; 654 } 655 656 /* 657 * Now map the initial kernel page tables. One block of page 658 * tables is placed at the beginning of kernel virtual memory, 659 * and another block is placed at KERNBASE to map the kernel binary, 660 * data, bss, and initial pre-allocations. 661 */ 662 for (i = 0; i < nkpt_base; i++) { 663 ((pd_entry_t *)KPDbase)[i] = KPTbase + (i << PAGE_SHIFT); 664 ((pd_entry_t *)KPDbase)[i] |= PG_RW | PG_V; 665 } 666 for (i = 0; i < nkpt_phys; i++) { 667 ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT); 668 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V; 669 } 670 671 /* 672 * Map from zero to end of allocations using 2M pages as an 673 * optimization. This will bypass some of the KPTBase pages 674 * above in the KERNBASE area. 675 */ 676 for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) { 677 ((pd_entry_t *)KPDbase)[i] = i << PDRSHIFT; 678 ((pd_entry_t *)KPDbase)[i] |= PG_RW | PG_V | PG_PS | PG_G; 679 } 680 681 /* 682 * And connect up the PD to the PDP. The kernel pmap is expected 683 * to pre-populate all of its PDs. See NKPDPE in vmparam.h. 684 */ 685 for (i = 0; i < NKPDPE; i++) { 686 ((pdp_entry_t *)KPDPphys)[NPDPEPG - NKPDPE + i] = 687 KPDphys + (i << PAGE_SHIFT); 688 ((pdp_entry_t *)KPDPphys)[NPDPEPG - NKPDPE + i] |= 689 PG_RW | PG_V | PG_U; 690 } 691 692 /* 693 * Now set up the direct map space using either 2MB or 1GB pages 694 * Preset PG_M and PG_A because demotion expects it. 695 * 696 * When filling in entries in the PD pages make sure any excess 697 * entries are set to zero as we allocated enough PD pages 698 */ 699 if ((amd_feature & AMDID_PAGE1GB) == 0) { 700 for (i = 0; i < NPDEPG * ndmpdp; i++) { 701 ((pd_entry_t *)DMPDphys)[i] = i << PDRSHIFT; 702 ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | 703 PG_G | PG_M | PG_A; 704 } 705 706 /* 707 * And the direct map space's PDP 708 */ 709 for (i = 0; i < ndmpdp; i++) { 710 ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + 711 (i << PAGE_SHIFT); 712 ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U; 713 } 714 } else { 715 for (i = 0; i < ndmpdp; i++) { 716 ((pdp_entry_t *)DMPDPphys)[i] = 717 (vm_paddr_t)i << PDPSHIFT; 718 ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_PS | 719 PG_G | PG_M | PG_A; 720 } 721 } 722 723 /* And recursively map PML4 to itself in order to get PTmap */ 724 ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys; 725 ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U; 726 727 /* 728 * Connect the Direct Map slots up to the PML4 729 */ 730 for (j = 0; j < NDMPML4E; ++j) { 731 ((pdp_entry_t *)KPML4phys)[DMPML4I + j] = 732 (DMPDPphys + ((vm_paddr_t)j << PML4SHIFT)) | 733 PG_RW | PG_V | PG_U; 734 } 735 736 /* 737 * Connect the KVA slot up to the PML4 738 */ 739 ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys; 740 ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U; 741 } 742 743 /* 744 * Bootstrap the system enough to run with virtual memory. 745 * 746 * On the i386 this is called after mapping has already been enabled 747 * and just syncs the pmap module with what has already been done. 748 * [We can't call it easily with mapping off since the kernel is not 749 * mapped with PA == VA, hence we would have to relocate every address 750 * from the linked base (virtual) address "KERNBASE" to the actual 751 * (physical) address starting relative to 0] 752 */ 753 void 754 pmap_bootstrap(vm_paddr_t *firstaddr) 755 { 756 vm_offset_t va; 757 pt_entry_t *pte; 758 759 KvaStart = VM_MIN_KERNEL_ADDRESS; 760 KvaEnd = VM_MAX_KERNEL_ADDRESS; 761 KvaSize = KvaEnd - KvaStart; 762 763 avail_start = *firstaddr; 764 765 /* 766 * Create an initial set of page tables to run the kernel in. 767 */ 768 create_pagetables(firstaddr); 769 770 virtual2_start = KvaStart; 771 virtual2_end = PTOV_OFFSET; 772 773 virtual_start = (vm_offset_t) PTOV_OFFSET + *firstaddr; 774 virtual_start = pmap_kmem_choose(virtual_start); 775 776 virtual_end = VM_MAX_KERNEL_ADDRESS; 777 778 /* XXX do %cr0 as well */ 779 load_cr4(rcr4() | CR4_PGE | CR4_PSE); 780 load_cr3(KPML4phys); 781 782 /* 783 * Initialize protection array. 784 */ 785 i386_protection_init(); 786 787 /* 788 * The kernel's pmap is statically allocated so we don't have to use 789 * pmap_create, which is unlikely to work correctly at this part of 790 * the boot sequence (XXX and which no longer exists). 791 */ 792 kernel_pmap.pm_pml4 = (pdp_entry_t *) (PTOV_OFFSET + KPML4phys); 793 kernel_pmap.pm_count = 1; 794 kernel_pmap.pm_active = (cpumask_t)-1 & ~CPUMASK_LOCK; 795 RB_INIT(&kernel_pmap.pm_pvroot); 796 spin_init(&kernel_pmap.pm_spin); 797 lwkt_token_init(&kernel_pmap.pm_token, "kpmap_tok"); 798 799 /* 800 * Reserve some special page table entries/VA space for temporary 801 * mapping of pages. 802 */ 803 #define SYSMAP(c, p, v, n) \ 804 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 805 806 va = virtual_start; 807 pte = vtopte(va); 808 809 /* 810 * CMAP1/CMAP2 are used for zeroing and copying pages. 811 */ 812 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 813 814 /* 815 * Crashdump maps. 816 */ 817 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 818 819 /* 820 * ptvmmap is used for reading arbitrary physical pages via 821 * /dev/mem. 822 */ 823 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 824 825 /* 826 * msgbufp is used to map the system message buffer. 827 * XXX msgbufmap is not used. 828 */ 829 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 830 atop(round_page(MSGBUF_SIZE))) 831 832 virtual_start = va; 833 834 *CMAP1 = 0; 835 836 /* 837 * PG_G is terribly broken on SMP because we IPI invltlb's in some 838 * cases rather then invl1pg. Actually, I don't even know why it 839 * works under UP because self-referential page table mappings 840 */ 841 pgeflag = 0; 842 843 /* 844 * Initialize the 4MB page size flag 845 */ 846 pseflag = 0; 847 /* 848 * The 4MB page version of the initial 849 * kernel page mapping. 850 */ 851 pdir4mb = 0; 852 853 #if !defined(DISABLE_PSE) 854 if (cpu_feature & CPUID_PSE) { 855 pt_entry_t ptditmp; 856 /* 857 * Note that we have enabled PSE mode 858 */ 859 pseflag = PG_PS; 860 ptditmp = *(PTmap + x86_64_btop(KERNBASE)); 861 ptditmp &= ~(NBPDR - 1); 862 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; 863 pdir4mb = ptditmp; 864 } 865 #endif 866 cpu_invltlb(); 867 } 868 869 /* 870 * Set 4mb pdir for mp startup 871 */ 872 void 873 pmap_set_opt(void) 874 { 875 if (pseflag && (cpu_feature & CPUID_PSE)) { 876 load_cr4(rcr4() | CR4_PSE); 877 if (pdir4mb && mycpu->gd_cpuid == 0) { /* only on BSP */ 878 cpu_invltlb(); 879 } 880 } 881 } 882 883 /* 884 * Initialize the pmap module. 885 * Called by vm_init, to initialize any structures that the pmap 886 * system needs to map virtual memory. 887 * pmap_init has been enhanced to support in a fairly consistant 888 * way, discontiguous physical memory. 889 */ 890 void 891 pmap_init(void) 892 { 893 int i; 894 int initial_pvs; 895 896 /* 897 * Allocate memory for random pmap data structures. Includes the 898 * pv_head_table. 899 */ 900 901 for (i = 0; i < vm_page_array_size; i++) { 902 vm_page_t m; 903 904 m = &vm_page_array[i]; 905 TAILQ_INIT(&m->md.pv_list); 906 } 907 908 /* 909 * init the pv free list 910 */ 911 initial_pvs = vm_page_array_size; 912 if (initial_pvs < MINPV) 913 initial_pvs = MINPV; 914 pvzone = &pvzone_store; 915 pvinit = (void *)kmem_alloc(&kernel_map, 916 initial_pvs * sizeof (struct pv_entry)); 917 zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), 918 pvinit, initial_pvs); 919 920 /* 921 * Now it is safe to enable pv_table recording. 922 */ 923 pmap_initialized = TRUE; 924 } 925 926 /* 927 * Initialize the address space (zone) for the pv_entries. Set a 928 * high water mark so that the system can recover from excessive 929 * numbers of pv entries. 930 */ 931 void 932 pmap_init2(void) 933 { 934 int shpgperproc = PMAP_SHPGPERPROC; 935 int entry_max; 936 937 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 938 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 939 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 940 pv_entry_high_water = 9 * (pv_entry_max / 10); 941 942 /* 943 * Subtract out pages already installed in the zone (hack) 944 */ 945 entry_max = pv_entry_max - vm_page_array_size; 946 if (entry_max <= 0) 947 entry_max = 1; 948 949 zinitna(pvzone, &pvzone_obj, NULL, 0, entry_max, ZONE_INTERRUPT, 1); 950 } 951 952 953 /*************************************************** 954 * Low level helper routines..... 955 ***************************************************/ 956 957 /* 958 * this routine defines the region(s) of memory that should 959 * not be tested for the modified bit. 960 */ 961 static __inline 962 int 963 pmap_track_modified(vm_pindex_t pindex) 964 { 965 vm_offset_t va = (vm_offset_t)pindex << PAGE_SHIFT; 966 if ((va < clean_sva) || (va >= clean_eva)) 967 return 1; 968 else 969 return 0; 970 } 971 972 /* 973 * Extract the physical page address associated with the map/VA pair. 974 * The page must be wired for this to work reliably. 975 * 976 * XXX for the moment we're using pv_find() instead of pv_get(), as 977 * callers might be expecting non-blocking operation. 978 */ 979 vm_paddr_t 980 pmap_extract(pmap_t pmap, vm_offset_t va) 981 { 982 vm_paddr_t rtval; 983 pv_entry_t pt_pv; 984 pt_entry_t *ptep; 985 986 rtval = 0; 987 if (va >= VM_MAX_USER_ADDRESS) { 988 /* 989 * Kernel page directories might be direct-mapped and 990 * there is typically no PV tracking of pte's 991 */ 992 pd_entry_t *pt; 993 994 pt = pmap_pt(pmap, va); 995 if (pt && (*pt & PG_V)) { 996 if (*pt & PG_PS) { 997 rtval = *pt & PG_PS_FRAME; 998 rtval |= va & PDRMASK; 999 } else { 1000 ptep = pmap_pt_to_pte(*pt, va); 1001 if (*pt & PG_V) { 1002 rtval = *ptep & PG_FRAME; 1003 rtval |= va & PAGE_MASK; 1004 } 1005 } 1006 } 1007 } else { 1008 /* 1009 * User pages currently do not direct-map the page directory 1010 * and some pages might not used managed PVs. But all PT's 1011 * will have a PV. 1012 */ 1013 pt_pv = pv_find(pmap, pmap_pt_pindex(va)); 1014 if (pt_pv) { 1015 ptep = pv_pte_lookup(pt_pv, pmap_pte_index(va)); 1016 if (*ptep & PG_V) { 1017 rtval = *ptep & PG_FRAME; 1018 rtval |= va & PAGE_MASK; 1019 } 1020 pv_drop(pt_pv); 1021 } 1022 } 1023 return rtval; 1024 } 1025 1026 /* 1027 * Extract the physical page address associated kernel virtual address. 1028 */ 1029 vm_paddr_t 1030 pmap_kextract(vm_offset_t va) 1031 { 1032 pd_entry_t pt; /* pt entry in pd */ 1033 vm_paddr_t pa; 1034 1035 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 1036 pa = DMAP_TO_PHYS(va); 1037 } else { 1038 pt = *vtopt(va); 1039 if (pt & PG_PS) { 1040 pa = (pt & PG_PS_FRAME) | (va & PDRMASK); 1041 } else { 1042 /* 1043 * Beware of a concurrent promotion that changes the 1044 * PDE at this point! For example, vtopte() must not 1045 * be used to access the PTE because it would use the 1046 * new PDE. It is, however, safe to use the old PDE 1047 * because the page table page is preserved by the 1048 * promotion. 1049 */ 1050 pa = *pmap_pt_to_pte(pt, va); 1051 pa = (pa & PG_FRAME) | (va & PAGE_MASK); 1052 } 1053 } 1054 return pa; 1055 } 1056 1057 /*************************************************** 1058 * Low level mapping routines..... 1059 ***************************************************/ 1060 1061 /* 1062 * Routine: pmap_kenter 1063 * Function: 1064 * Add a wired page to the KVA 1065 * NOTE! note that in order for the mapping to take effect -- you 1066 * should do an invltlb after doing the pmap_kenter(). 1067 */ 1068 void 1069 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 1070 { 1071 pt_entry_t *pte; 1072 pt_entry_t npte; 1073 pmap_inval_info info; 1074 1075 pmap_inval_init(&info); /* XXX remove */ 1076 npte = pa | PG_RW | PG_V | pgeflag; 1077 pte = vtopte(va); 1078 pmap_inval_interlock(&info, &kernel_pmap, va); /* XXX remove */ 1079 *pte = npte; 1080 pmap_inval_deinterlock(&info, &kernel_pmap); /* XXX remove */ 1081 pmap_inval_done(&info); /* XXX remove */ 1082 } 1083 1084 /* 1085 * Routine: pmap_kenter_quick 1086 * Function: 1087 * Similar to pmap_kenter(), except we only invalidate the 1088 * mapping on the current CPU. 1089 */ 1090 void 1091 pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) 1092 { 1093 pt_entry_t *pte; 1094 pt_entry_t npte; 1095 1096 npte = pa | PG_RW | PG_V | pgeflag; 1097 pte = vtopte(va); 1098 *pte = npte; 1099 cpu_invlpg((void *)va); 1100 } 1101 1102 void 1103 pmap_kenter_sync(vm_offset_t va) 1104 { 1105 pmap_inval_info info; 1106 1107 pmap_inval_init(&info); 1108 pmap_inval_interlock(&info, &kernel_pmap, va); 1109 pmap_inval_deinterlock(&info, &kernel_pmap); 1110 pmap_inval_done(&info); 1111 } 1112 1113 void 1114 pmap_kenter_sync_quick(vm_offset_t va) 1115 { 1116 cpu_invlpg((void *)va); 1117 } 1118 1119 /* 1120 * remove a page from the kernel pagetables 1121 */ 1122 void 1123 pmap_kremove(vm_offset_t va) 1124 { 1125 pt_entry_t *pte; 1126 pmap_inval_info info; 1127 1128 pmap_inval_init(&info); 1129 pte = vtopte(va); 1130 pmap_inval_interlock(&info, &kernel_pmap, va); 1131 (void)pte_load_clear(pte); 1132 pmap_inval_deinterlock(&info, &kernel_pmap); 1133 pmap_inval_done(&info); 1134 } 1135 1136 void 1137 pmap_kremove_quick(vm_offset_t va) 1138 { 1139 pt_entry_t *pte; 1140 pte = vtopte(va); 1141 (void)pte_load_clear(pte); 1142 cpu_invlpg((void *)va); 1143 } 1144 1145 /* 1146 * XXX these need to be recoded. They are not used in any critical path. 1147 */ 1148 void 1149 pmap_kmodify_rw(vm_offset_t va) 1150 { 1151 atomic_set_long(vtopte(va), PG_RW); 1152 cpu_invlpg((void *)va); 1153 } 1154 1155 void 1156 pmap_kmodify_nc(vm_offset_t va) 1157 { 1158 atomic_set_long(vtopte(va), PG_N); 1159 cpu_invlpg((void *)va); 1160 } 1161 1162 /* 1163 * Used to map a range of physical addresses into kernel virtual 1164 * address space during the low level boot, typically to map the 1165 * dump bitmap, message buffer, and vm_page_array. 1166 * 1167 * These mappings are typically made at some pointer after the end of the 1168 * kernel text+data. 1169 * 1170 * We could return PHYS_TO_DMAP(start) here and not allocate any 1171 * via (*virtp), but then kmem from userland and kernel dumps won't 1172 * have access to the related pointers. 1173 */ 1174 vm_offset_t 1175 pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) 1176 { 1177 vm_offset_t va; 1178 vm_offset_t va_start; 1179 1180 /*return PHYS_TO_DMAP(start);*/ 1181 1182 va_start = *virtp; 1183 va = va_start; 1184 1185 while (start < end) { 1186 pmap_kenter_quick(va, start); 1187 va += PAGE_SIZE; 1188 start += PAGE_SIZE; 1189 } 1190 *virtp = va; 1191 return va_start; 1192 } 1193 1194 1195 /* 1196 * Add a list of wired pages to the kva 1197 * this routine is only used for temporary 1198 * kernel mappings that do not need to have 1199 * page modification or references recorded. 1200 * Note that old mappings are simply written 1201 * over. The page *must* be wired. 1202 */ 1203 void 1204 pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 1205 { 1206 vm_offset_t end_va; 1207 1208 end_va = va + count * PAGE_SIZE; 1209 1210 while (va < end_va) { 1211 pt_entry_t *pte; 1212 1213 pte = vtopte(va); 1214 *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; 1215 cpu_invlpg((void *)va); 1216 va += PAGE_SIZE; 1217 m++; 1218 } 1219 smp_invltlb(); 1220 } 1221 1222 /* 1223 * This routine jerks page mappings from the 1224 * kernel -- it is meant only for temporary mappings. 1225 * 1226 * MPSAFE, INTERRUPT SAFE (cluster callback) 1227 */ 1228 void 1229 pmap_qremove(vm_offset_t va, int count) 1230 { 1231 vm_offset_t end_va; 1232 1233 end_va = va + count * PAGE_SIZE; 1234 1235 while (va < end_va) { 1236 pt_entry_t *pte; 1237 1238 pte = vtopte(va); 1239 (void)pte_load_clear(pte); 1240 cpu_invlpg((void *)va); 1241 va += PAGE_SIZE; 1242 } 1243 smp_invltlb(); 1244 } 1245 1246 /* 1247 * Create a new thread and optionally associate it with a (new) process. 1248 * NOTE! the new thread's cpu may not equal the current cpu. 1249 */ 1250 void 1251 pmap_init_thread(thread_t td) 1252 { 1253 /* enforce pcb placement & alignment */ 1254 td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_size) - 1; 1255 td->td_pcb = (struct pcb *)((intptr_t)td->td_pcb & ~(intptr_t)0xF); 1256 td->td_savefpu = &td->td_pcb->pcb_save; 1257 td->td_sp = (char *)td->td_pcb; /* no -16 */ 1258 } 1259 1260 /* 1261 * This routine directly affects the fork perf for a process. 1262 */ 1263 void 1264 pmap_init_proc(struct proc *p) 1265 { 1266 } 1267 1268 /* 1269 * Initialize pmap0/vmspace0. This pmap is not added to pmap_list because 1270 * it, and IdlePTD, represents the template used to update all other pmaps. 1271 * 1272 * On architectures where the kernel pmap is not integrated into the user 1273 * process pmap, this pmap represents the process pmap, not the kernel pmap. 1274 * kernel_pmap should be used to directly access the kernel_pmap. 1275 */ 1276 void 1277 pmap_pinit0(struct pmap *pmap) 1278 { 1279 pmap->pm_pml4 = (pml4_entry_t *)(PTOV_OFFSET + KPML4phys); 1280 pmap->pm_count = 1; 1281 pmap->pm_active = 0; 1282 pmap->pm_pvhint = NULL; 1283 RB_INIT(&pmap->pm_pvroot); 1284 spin_init(&pmap->pm_spin); 1285 lwkt_token_init(&pmap->pm_token, "pmap_tok"); 1286 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1287 } 1288 1289 /* 1290 * Initialize a preallocated and zeroed pmap structure, 1291 * such as one in a vmspace structure. 1292 */ 1293 static void 1294 pmap_pinit_simple(struct pmap *pmap) 1295 { 1296 /* 1297 * Misc initialization 1298 */ 1299 pmap->pm_count = 1; 1300 pmap->pm_active = 0; 1301 pmap->pm_pvhint = NULL; 1302 pmap->pm_flags = PMAP_FLAG_SIMPLE; 1303 1304 /* 1305 * Don't blow up locks/tokens on re-use (XXX fix/use drop code 1306 * for this). 1307 */ 1308 if (pmap->pm_pmlpv == NULL) { 1309 RB_INIT(&pmap->pm_pvroot); 1310 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1311 spin_init(&pmap->pm_spin); 1312 lwkt_token_init(&pmap->pm_token, "pmap_tok"); 1313 } 1314 } 1315 1316 void 1317 pmap_pinit(struct pmap *pmap) 1318 { 1319 pv_entry_t pv; 1320 int j; 1321 1322 pmap_pinit_simple(pmap); 1323 pmap->pm_flags &= ~PMAP_FLAG_SIMPLE; 1324 1325 /* 1326 * No need to allocate page table space yet but we do need a valid 1327 * page directory table. 1328 */ 1329 if (pmap->pm_pml4 == NULL) { 1330 pmap->pm_pml4 = 1331 (pml4_entry_t *)kmem_alloc_pageable(&kernel_map, PAGE_SIZE); 1332 } 1333 1334 /* 1335 * Allocate the page directory page, which wires it even though 1336 * it isn't being entered into some higher level page table (it 1337 * being the highest level). If one is already cached we don't 1338 * have to do anything. 1339 */ 1340 if ((pv = pmap->pm_pmlpv) == NULL) { 1341 pv = pmap_allocpte(pmap, pmap_pml4_pindex(), NULL); 1342 pmap->pm_pmlpv = pv; 1343 pmap_kenter((vm_offset_t)pmap->pm_pml4, 1344 VM_PAGE_TO_PHYS(pv->pv_m)); 1345 pv_put(pv); 1346 1347 /* 1348 * Install DMAP and KMAP. 1349 */ 1350 for (j = 0; j < NDMPML4E; ++j) { 1351 pmap->pm_pml4[DMPML4I + j] = 1352 (DMPDPphys + ((vm_paddr_t)j << PML4SHIFT)) | 1353 PG_RW | PG_V | PG_U; 1354 } 1355 pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; 1356 1357 /* 1358 * install self-referential address mapping entry 1359 */ 1360 pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pv->pv_m) | 1361 PG_V | PG_RW | PG_A | PG_M; 1362 } else { 1363 KKASSERT(pv->pv_m->flags & PG_MAPPED); 1364 KKASSERT(pv->pv_m->flags & PG_WRITEABLE); 1365 } 1366 KKASSERT(pmap->pm_pml4[255] == 0); 1367 KKASSERT(RB_ROOT(&pmap->pm_pvroot) == pv); 1368 KKASSERT(pv->pv_entry.rbe_left == NULL); 1369 KKASSERT(pv->pv_entry.rbe_right == NULL); 1370 } 1371 1372 /* 1373 * Clean up a pmap structure so it can be physically freed. This routine 1374 * is called by the vmspace dtor function. A great deal of pmap data is 1375 * left passively mapped to improve vmspace management so we have a bit 1376 * of cleanup work to do here. 1377 */ 1378 void 1379 pmap_puninit(pmap_t pmap) 1380 { 1381 pv_entry_t pv; 1382 vm_page_t p; 1383 1384 KKASSERT(pmap->pm_active == 0); 1385 if ((pv = pmap->pm_pmlpv) != NULL) { 1386 if (pv_hold_try(pv) == 0) 1387 pv_lock(pv); 1388 p = pmap_remove_pv_page(pv); 1389 pv_free(pv); 1390 pmap_kremove((vm_offset_t)pmap->pm_pml4); 1391 vm_page_busy_wait(p, FALSE, "pgpun"); 1392 KKASSERT(p->flags & (PG_FICTITIOUS|PG_UNMANAGED)); 1393 vm_page_unwire(p, 0); 1394 vm_page_flag_clear(p, PG_MAPPED | PG_WRITEABLE); 1395 1396 /* 1397 * XXX eventually clean out PML4 static entries and 1398 * use vm_page_free_zero() 1399 */ 1400 vm_page_free(p); 1401 pmap->pm_pmlpv = NULL; 1402 } 1403 if (pmap->pm_pml4) { 1404 KKASSERT(pmap->pm_pml4 != (void *)(PTOV_OFFSET + KPML4phys)); 1405 kmem_free(&kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE); 1406 pmap->pm_pml4 = NULL; 1407 } 1408 KKASSERT(pmap->pm_stats.resident_count == 0); 1409 KKASSERT(pmap->pm_stats.wired_count == 0); 1410 } 1411 1412 /* 1413 * Wire in kernel global address entries. To avoid a race condition 1414 * between pmap initialization and pmap_growkernel, this procedure 1415 * adds the pmap to the master list (which growkernel scans to update), 1416 * then copies the template. 1417 */ 1418 void 1419 pmap_pinit2(struct pmap *pmap) 1420 { 1421 spin_lock(&pmap_spin); 1422 TAILQ_INSERT_TAIL(&pmap_list, pmap, pm_pmnode); 1423 spin_unlock(&pmap_spin); 1424 } 1425 1426 /* 1427 * This routine is called when various levels in the page table need to 1428 * be populated. This routine cannot fail. 1429 * 1430 * This function returns two locked pv_entry's, one representing the 1431 * requested pv and one representing the requested pv's parent pv. If 1432 * the pv did not previously exist it will be mapped into its parent 1433 * and wired, otherwise no additional wire count will be added. 1434 */ 1435 static 1436 pv_entry_t 1437 pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp) 1438 { 1439 pt_entry_t *ptep; 1440 pv_entry_t pv; 1441 pv_entry_t pvp; 1442 vm_pindex_t pt_pindex; 1443 vm_page_t m; 1444 int isnew; 1445 int ispt; 1446 1447 /* 1448 * If the pv already exists and we aren't being asked for the 1449 * parent page table page we can just return it. A locked+held pv 1450 * is returned. 1451 */ 1452 ispt = 0; 1453 pv = pv_alloc(pmap, ptepindex, &isnew); 1454 if (isnew == 0 && pvpp == NULL) 1455 return(pv); 1456 1457 /* 1458 * This is a new PV, we have to resolve its parent page table and 1459 * add an additional wiring to the page if necessary. 1460 */ 1461 1462 /* 1463 * Special case terminal PVs. These are not page table pages so 1464 * no vm_page is allocated (the caller supplied the vm_page). If 1465 * pvpp is non-NULL we are being asked to also removed the pt_pv 1466 * for this pv. 1467 * 1468 * Note that pt_pv's are only returned for user VAs. We assert that 1469 * a pt_pv is not being requested for kernel VAs. 1470 */ 1471 if (ptepindex < pmap_pt_pindex(0)) { 1472 if (ptepindex >= NUPTE_USER) 1473 KKASSERT(pvpp == NULL); 1474 else 1475 KKASSERT(pvpp != NULL); 1476 if (pvpp) { 1477 pt_pindex = NUPTE_TOTAL + (ptepindex >> NPTEPGSHIFT); 1478 pvp = pmap_allocpte(pmap, pt_pindex, NULL); 1479 if (isnew) 1480 vm_page_wire_quick(pvp->pv_m); 1481 *pvpp = pvp; 1482 } else { 1483 pvp = NULL; 1484 } 1485 return(pv); 1486 } 1487 1488 /* 1489 * Non-terminal PVs allocate a VM page to represent the page table, 1490 * so we have to resolve pvp and calculate ptepindex for the pvp 1491 * and then for the page table entry index in the pvp for 1492 * fall-through. 1493 */ 1494 if (ptepindex < pmap_pd_pindex(0)) { 1495 /* 1496 * pv is PT, pvp is PD 1497 */ 1498 ptepindex = (ptepindex - pmap_pt_pindex(0)) >> NPDEPGSHIFT; 1499 ptepindex += NUPTE_TOTAL + NUPT_TOTAL; 1500 pvp = pmap_allocpte(pmap, ptepindex, NULL); 1501 if (!isnew) 1502 goto notnew; 1503 1504 /* 1505 * PT index in PD 1506 */ 1507 ptepindex = pv->pv_pindex - pmap_pt_pindex(0); 1508 ptepindex &= ((1ul << NPDEPGSHIFT) - 1); 1509 ispt = 1; 1510 } else if (ptepindex < pmap_pdp_pindex(0)) { 1511 /* 1512 * pv is PD, pvp is PDP 1513 * 1514 * SIMPLE PMAP NOTE: Simple pmaps do not allocate above 1515 * the PD. 1516 */ 1517 ptepindex = (ptepindex - pmap_pd_pindex(0)) >> NPDPEPGSHIFT; 1518 ptepindex += NUPTE_TOTAL + NUPT_TOTAL + NUPD_TOTAL; 1519 1520 if (pmap->pm_flags & PMAP_FLAG_SIMPLE) { 1521 KKASSERT(pvpp == NULL); 1522 pvp = NULL; 1523 } else { 1524 pvp = pmap_allocpte(pmap, ptepindex, NULL); 1525 } 1526 if (!isnew) 1527 goto notnew; 1528 1529 /* 1530 * PD index in PDP 1531 */ 1532 ptepindex = pv->pv_pindex - pmap_pd_pindex(0); 1533 ptepindex &= ((1ul << NPDPEPGSHIFT) - 1); 1534 } else if (ptepindex < pmap_pml4_pindex()) { 1535 /* 1536 * pv is PDP, pvp is the root pml4 table 1537 */ 1538 pvp = pmap_allocpte(pmap, pmap_pml4_pindex(), NULL); 1539 if (!isnew) 1540 goto notnew; 1541 1542 /* 1543 * PDP index in PML4 1544 */ 1545 ptepindex = pv->pv_pindex - pmap_pdp_pindex(0); 1546 ptepindex &= ((1ul << NPML4EPGSHIFT) - 1); 1547 } else { 1548 /* 1549 * pv represents the top-level PML4, there is no parent. 1550 */ 1551 pvp = NULL; 1552 if (!isnew) 1553 goto notnew; 1554 } 1555 1556 /* 1557 * This code is only reached if isnew is TRUE and this is not a 1558 * terminal PV. We need to allocate a vm_page for the page table 1559 * at this level and enter it into the parent page table. 1560 * 1561 * page table pages are marked PG_WRITEABLE and PG_MAPPED. 1562 */ 1563 for (;;) { 1564 m = vm_page_alloc(NULL, pv->pv_pindex, 1565 VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM | 1566 VM_ALLOC_INTERRUPT); 1567 if (m) 1568 break; 1569 vm_wait(0); 1570 } 1571 vm_page_spin_lock(m); 1572 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1573 pv->pv_m = m; 1574 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); 1575 vm_page_spin_unlock(m); 1576 vm_page_unmanage(m); /* m must be spinunlocked */ 1577 1578 if ((m->flags & PG_ZERO) == 0) { 1579 pmap_zero_page(VM_PAGE_TO_PHYS(m)); 1580 } 1581 #ifdef PMAP_DEBUG 1582 else { 1583 pmap_page_assertzero(VM_PAGE_TO_PHYS(m)); 1584 } 1585 #endif 1586 m->valid = VM_PAGE_BITS_ALL; 1587 vm_page_flag_clear(m, PG_ZERO); 1588 vm_page_wire(m); /* wire for mapping in parent */ 1589 1590 /* 1591 * Wire the page into pvp, bump the wire-count for pvp's page table 1592 * page. Bump the resident_count for the pmap. There is no pvp 1593 * for the top level, address the pm_pml4[] array directly. 1594 * 1595 * If the caller wants the parent we return it, otherwise 1596 * we just put it away. 1597 * 1598 * No interlock is needed for pte 0 -> non-zero. 1599 * 1600 * In the situation where *ptep is valid we might have an unmanaged 1601 * page table page shared from another page table which we need to 1602 * unshare before installing our private page table page. 1603 */ 1604 if (pvp) { 1605 ptep = pv_pte_lookup(pvp, ptepindex); 1606 if (*ptep & PG_V) { 1607 pt_entry_t pte; 1608 pmap_inval_info info; 1609 1610 if (ispt == 0) { 1611 panic("pmap_allocpte: unexpected pte %p/%d", 1612 pvp, (int)ptepindex); 1613 } 1614 pmap_inval_init(&info); 1615 pmap_inval_interlock(&info, pmap, (vm_offset_t)-1); 1616 pte = pte_load_clear(ptep); 1617 pmap_inval_deinterlock(&info, pmap); 1618 pmap_inval_done(&info); 1619 if (vm_page_unwire_quick( 1620 PHYS_TO_VM_PAGE(pte & PG_FRAME))) { 1621 panic("pmap_allocpte: shared pgtable " 1622 "pg bad wirecount"); 1623 } 1624 atomic_add_long(&pmap->pm_stats.resident_count, -1); 1625 } else { 1626 vm_page_wire_quick(pvp->pv_m); 1627 } 1628 *ptep = VM_PAGE_TO_PHYS(m) | (PG_U | PG_RW | PG_V | 1629 PG_A | PG_M); 1630 } 1631 vm_page_wakeup(m); 1632 notnew: 1633 if (pvpp) 1634 *pvpp = pvp; 1635 else if (pvp) 1636 pv_put(pvp); 1637 return (pv); 1638 } 1639 1640 /* 1641 * This version of pmap_allocpte() checks for possible segment optimizations 1642 * that would allow page-table sharing. It can be called for terminal 1643 * page or page table page ptepindex's. 1644 * 1645 * The function is called with page table page ptepindex's for fictitious 1646 * and unmanaged terminal pages. That is, we don't want to allocate a 1647 * terminal pv, we just want the pt_pv. pvpp is usually passed as NULL 1648 * for this case. 1649 * 1650 * This function can return a pv and *pvpp associated with the passed in pmap 1651 * OR a pv and *pvpp associated with the shared pmap. In the latter case 1652 * an unmanaged page table page will be entered into the pass in pmap. 1653 */ 1654 static 1655 pv_entry_t 1656 pmap_allocpte_seg(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp, 1657 vm_map_entry_t entry, vm_offset_t va) 1658 { 1659 struct pmap_inval_info info; 1660 vm_object_t object; 1661 pmap_t obpmap; 1662 pmap_t *obpmapp; 1663 vm_offset_t b; 1664 pv_entry_t pte_pv; /* in original or shared pmap */ 1665 pv_entry_t pt_pv; /* in original or shared pmap */ 1666 pv_entry_t proc_pd_pv; /* in original pmap */ 1667 pv_entry_t proc_pt_pv; /* in original pmap */ 1668 pv_entry_t xpv; /* PT in shared pmap */ 1669 pd_entry_t *pt; /* PT entry in PD of original pmap */ 1670 pd_entry_t opte; /* contents of *pt */ 1671 pd_entry_t npte; /* contents of *pt */ 1672 vm_page_t m; 1673 1674 retry: 1675 /* 1676 * Basic tests, require a non-NULL vm_map_entry, require proper 1677 * alignment and type for the vm_map_entry, require that the 1678 * underlying object already be allocated. 1679 * 1680 * We currently allow any type of object to use this optimization. 1681 * The object itself does NOT have to be sized to a multiple of the 1682 * segment size, but the memory mapping does. 1683 */ 1684 if (entry == NULL || 1685 pmap_mmu_optimize == 0 || /* not enabled */ 1686 ptepindex >= pmap_pd_pindex(0) || /* not terminal */ 1687 entry->inheritance != VM_INHERIT_SHARE || /* not shared */ 1688 entry->maptype != VM_MAPTYPE_NORMAL || /* weird map type */ 1689 entry->object.vm_object == NULL || /* needs VM object */ 1690 (entry->offset & SEG_MASK) || /* must be aligned */ 1691 (entry->start & SEG_MASK)) { 1692 return(pmap_allocpte(pmap, ptepindex, pvpp)); 1693 } 1694 1695 /* 1696 * Make sure the full segment can be represented. 1697 */ 1698 b = va & ~(vm_offset_t)SEG_MASK; 1699 if (b < entry->start && b + SEG_SIZE > entry->end) 1700 return(pmap_allocpte(pmap, ptepindex, pvpp)); 1701 1702 /* 1703 * If the full segment can be represented dive the VM object's 1704 * shared pmap, allocating as required. 1705 */ 1706 object = entry->object.vm_object; 1707 1708 if (entry->protection & VM_PROT_WRITE) 1709 obpmapp = &object->md.pmap_rw; 1710 else 1711 obpmapp = &object->md.pmap_ro; 1712 1713 /* 1714 * We allocate what appears to be a normal pmap but because portions 1715 * of this pmap are shared with other unrelated pmaps we have to 1716 * set pm_active to point to all cpus. 1717 * 1718 * XXX Currently using pmap_spin to interlock the update, can't use 1719 * vm_object_hold/drop because the token might already be held 1720 * shared OR exclusive and we don't know. 1721 */ 1722 while ((obpmap = *obpmapp) == NULL) { 1723 obpmap = kmalloc(sizeof(*obpmap), M_OBJPMAP, M_WAITOK|M_ZERO); 1724 pmap_pinit_simple(obpmap); 1725 pmap_pinit2(obpmap); 1726 spin_lock(&pmap_spin); 1727 if (*obpmapp != NULL) { 1728 /* 1729 * Handle race 1730 */ 1731 spin_unlock(&pmap_spin); 1732 pmap_release(obpmap); 1733 pmap_puninit(obpmap); 1734 kfree(obpmap, M_OBJPMAP); 1735 } else { 1736 obpmap->pm_active = smp_active_mask; 1737 *obpmapp = obpmap; 1738 spin_unlock(&pmap_spin); 1739 } 1740 } 1741 1742 /* 1743 * Layering is: PTE, PT, PD, PDP, PML4. We have to return the 1744 * pte/pt using the shared pmap from the object but also adjust 1745 * the process pmap's page table page as a side effect. 1746 */ 1747 1748 /* 1749 * Resolve the terminal PTE and PT in the shared pmap. This is what 1750 * we will return. This is true if ptepindex represents a terminal 1751 * page, otherwise pte_pv is actually the PT and pt_pv is actually 1752 * the PD. 1753 */ 1754 pt_pv = NULL; 1755 pte_pv = pmap_allocpte(obpmap, ptepindex, &pt_pv); 1756 if (ptepindex >= pmap_pt_pindex(0)) 1757 xpv = pte_pv; 1758 else 1759 xpv = pt_pv; 1760 1761 /* 1762 * Resolve the PD in the process pmap so we can properly share the 1763 * page table page. Lock order is bottom-up (leaf first)! 1764 * 1765 * NOTE: proc_pt_pv can be NULL. 1766 */ 1767 proc_pt_pv = pv_get(pmap, pmap_pt_pindex(b)); 1768 proc_pd_pv = pmap_allocpte(pmap, pmap_pd_pindex(b), NULL); 1769 1770 /* 1771 * xpv is the page table page pv from the shared object 1772 * (for convenience). 1773 * 1774 * Calculate the pte value for the PT to load into the process PD. 1775 * If we have to change it we must properly dispose of the previous 1776 * entry. 1777 */ 1778 pt = pv_pte_lookup(proc_pd_pv, pmap_pt_index(b)); 1779 npte = VM_PAGE_TO_PHYS(xpv->pv_m) | 1780 (PG_U | PG_RW | PG_V | PG_A | PG_M); 1781 1782 /* 1783 * Dispose of previous page table page if it was local to the 1784 * process pmap. If the old pt is not empty we cannot dispose of it 1785 * until we clean it out. This case should not arise very often so 1786 * it is not optimized. 1787 */ 1788 if (proc_pt_pv) { 1789 if (proc_pt_pv->pv_m->wire_count != 1) { 1790 pv_put(proc_pd_pv); 1791 pv_put(proc_pt_pv); 1792 pv_put(pt_pv); 1793 pv_put(pte_pv); 1794 pmap_remove(pmap, 1795 va & ~(vm_offset_t)SEG_MASK, 1796 (va + SEG_SIZE) & ~(vm_offset_t)SEG_MASK); 1797 goto retry; 1798 } 1799 pmap_release_pv(proc_pt_pv, proc_pd_pv); 1800 proc_pt_pv = NULL; 1801 /* relookup */ 1802 pt = pv_pte_lookup(proc_pd_pv, pmap_pt_index(b)); 1803 } 1804 1805 /* 1806 * Handle remaining cases. 1807 */ 1808 if (*pt == 0) { 1809 *pt = npte; 1810 vm_page_wire_quick(xpv->pv_m); 1811 vm_page_wire_quick(proc_pd_pv->pv_m); 1812 atomic_add_long(&pmap->pm_stats.resident_count, 1); 1813 } else if (*pt != npte) { 1814 pmap_inval_init(&info); 1815 pmap_inval_interlock(&info, pmap, (vm_offset_t)-1); 1816 1817 opte = pte_load_clear(pt); 1818 KKASSERT(opte && opte != npte); 1819 1820 *pt = npte; 1821 vm_page_wire_quick(xpv->pv_m); /* pgtable pg that is npte */ 1822 1823 /* 1824 * Clean up opte, bump the wire_count for the process 1825 * PD page representing the new entry if it was 1826 * previously empty. 1827 * 1828 * If the entry was not previously empty and we have 1829 * a PT in the proc pmap then opte must match that 1830 * pt. The proc pt must be retired (this is done 1831 * later on in this procedure). 1832 * 1833 * NOTE: replacing valid pte, wire_count on proc_pd_pv 1834 * stays the same. 1835 */ 1836 KKASSERT(opte & PG_V); 1837 m = PHYS_TO_VM_PAGE(opte & PG_FRAME); 1838 if (vm_page_unwire_quick(m)) { 1839 panic("pmap_allocpte_seg: " 1840 "bad wire count %p", 1841 m); 1842 } 1843 1844 pmap_inval_deinterlock(&info, pmap); 1845 pmap_inval_done(&info); 1846 } 1847 1848 /* 1849 * The existing process page table was replaced and must be destroyed 1850 * here. 1851 */ 1852 if (proc_pd_pv) 1853 pv_put(proc_pd_pv); 1854 if (pvpp) 1855 *pvpp = pt_pv; 1856 else 1857 pv_put(pt_pv); 1858 1859 return (pte_pv); 1860 } 1861 1862 /* 1863 * Release any resources held by the given physical map. 1864 * 1865 * Called when a pmap initialized by pmap_pinit is being released. Should 1866 * only be called if the map contains no valid mappings. 1867 * 1868 * Caller must hold pmap->pm_token 1869 */ 1870 struct pmap_release_info { 1871 pmap_t pmap; 1872 int retry; 1873 }; 1874 1875 static int pmap_release_callback(pv_entry_t pv, void *data); 1876 1877 void 1878 pmap_release(struct pmap *pmap) 1879 { 1880 struct pmap_release_info info; 1881 1882 KASSERT(pmap->pm_active == 0, 1883 ("pmap still active! %016jx", (uintmax_t)pmap->pm_active)); 1884 1885 spin_lock(&pmap_spin); 1886 TAILQ_REMOVE(&pmap_list, pmap, pm_pmnode); 1887 spin_unlock(&pmap_spin); 1888 1889 /* 1890 * Pull pv's off the RB tree in order from low to high and release 1891 * each page. 1892 */ 1893 info.pmap = pmap; 1894 do { 1895 info.retry = 0; 1896 spin_lock(&pmap->pm_spin); 1897 RB_SCAN(pv_entry_rb_tree, &pmap->pm_pvroot, NULL, 1898 pmap_release_callback, &info); 1899 spin_unlock(&pmap->pm_spin); 1900 } while (info.retry); 1901 1902 1903 /* 1904 * One resident page (the pml4 page) should remain. 1905 * No wired pages should remain. 1906 */ 1907 KKASSERT(pmap->pm_stats.resident_count == 1908 ((pmap->pm_flags & PMAP_FLAG_SIMPLE) ? 0 : 1)); 1909 1910 KKASSERT(pmap->pm_stats.wired_count == 0); 1911 } 1912 1913 static int 1914 pmap_release_callback(pv_entry_t pv, void *data) 1915 { 1916 struct pmap_release_info *info = data; 1917 pmap_t pmap = info->pmap; 1918 int r; 1919 1920 if (pv_hold_try(pv)) { 1921 spin_unlock(&pmap->pm_spin); 1922 } else { 1923 spin_unlock(&pmap->pm_spin); 1924 pv_lock(pv); 1925 if (pv->pv_pmap != pmap) { 1926 pv_put(pv); 1927 spin_lock(&pmap->pm_spin); 1928 info->retry = 1; 1929 return(-1); 1930 } 1931 } 1932 r = pmap_release_pv(pv, NULL); 1933 spin_lock(&pmap->pm_spin); 1934 return(r); 1935 } 1936 1937 /* 1938 * Called with held (i.e. also locked) pv. This function will dispose of 1939 * the lock along with the pv. 1940 * 1941 * If the caller already holds the locked parent page table for pv it 1942 * must pass it as pvp, allowing us to avoid a deadlock, else it can 1943 * pass NULL for pvp. 1944 */ 1945 static int 1946 pmap_release_pv(pv_entry_t pv, pv_entry_t pvp) 1947 { 1948 vm_page_t p; 1949 1950 /* 1951 * The pmap is currently not spinlocked, pv is held+locked. 1952 * Remove the pv's page from its parent's page table. The 1953 * parent's page table page's wire_count will be decremented. 1954 */ 1955 pmap_remove_pv_pte(pv, pvp, NULL); 1956 1957 /* 1958 * Terminal pvs are unhooked from their vm_pages. Because 1959 * terminal pages aren't page table pages they aren't wired 1960 * by us, so we have to be sure not to unwire them either. 1961 */ 1962 if (pv->pv_pindex < pmap_pt_pindex(0)) { 1963 pmap_remove_pv_page(pv); 1964 goto skip; 1965 } 1966 1967 /* 1968 * We leave the top-level page table page cached, wired, and 1969 * mapped in the pmap until the dtor function (pmap_puninit()) 1970 * gets called. 1971 * 1972 * Since we are leaving the top-level pv intact we need 1973 * to break out of what would otherwise be an infinite loop. 1974 */ 1975 if (pv->pv_pindex == pmap_pml4_pindex()) { 1976 pv_put(pv); 1977 return(-1); 1978 } 1979 1980 /* 1981 * For page table pages (other than the top-level page), 1982 * remove and free the vm_page. The representitive mapping 1983 * removed above by pmap_remove_pv_pte() did not undo the 1984 * last wire_count so we have to do that as well. 1985 */ 1986 p = pmap_remove_pv_page(pv); 1987 vm_page_busy_wait(p, FALSE, "pmaprl"); 1988 if (p->wire_count != 1) { 1989 kprintf("p->wire_count was %016lx %d\n", 1990 pv->pv_pindex, p->wire_count); 1991 } 1992 KKASSERT(p->wire_count == 1); 1993 KKASSERT(p->flags & PG_UNMANAGED); 1994 1995 vm_page_unwire(p, 0); 1996 KKASSERT(p->wire_count == 0); 1997 1998 /* 1999 * Theoretically this page, if not the pml4 page, should contain 2000 * all-zeros. But its just too dangerous to mark it PG_ZERO. Free 2001 * normally. 2002 */ 2003 vm_page_free(p); 2004 skip: 2005 pv_free(pv); 2006 return 0; 2007 } 2008 2009 /* 2010 * This function will remove the pte associated with a pv from its parent. 2011 * Terminal pv's are supported. The removal will be interlocked if info 2012 * is non-NULL. The caller must dispose of pv instead of just unlocking 2013 * it. 2014 * 2015 * The wire count will be dropped on the parent page table. The wire 2016 * count on the page being removed (pv->pv_m) from the parent page table 2017 * is NOT touched. Note that terminal pages will not have any additional 2018 * wire counts while page table pages will have at least one representing 2019 * the mapping, plus others representing sub-mappings. 2020 * 2021 * NOTE: Cannot be called on kernel page table pages, only KVM terminal 2022 * pages and user page table and terminal pages. 2023 * 2024 * The pv must be locked. 2025 * 2026 * XXX must lock parent pv's if they exist to remove pte XXX 2027 */ 2028 static 2029 void 2030 pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, struct pmap_inval_info *info) 2031 { 2032 vm_pindex_t ptepindex = pv->pv_pindex; 2033 pmap_t pmap = pv->pv_pmap; 2034 vm_page_t p; 2035 int gotpvp = 0; 2036 2037 KKASSERT(pmap); 2038 2039 if (ptepindex == pmap_pml4_pindex()) { 2040 /* 2041 * We are the top level pml4 table, there is no parent. 2042 */ 2043 p = pmap->pm_pmlpv->pv_m; 2044 } else if (ptepindex >= pmap_pdp_pindex(0)) { 2045 /* 2046 * Remove a PDP page from the pml4e. This can only occur 2047 * with user page tables. We do not have to lock the 2048 * pml4 PV so just ignore pvp. 2049 */ 2050 vm_pindex_t pml4_pindex; 2051 vm_pindex_t pdp_index; 2052 pml4_entry_t *pdp; 2053 2054 pdp_index = ptepindex - pmap_pdp_pindex(0); 2055 if (pvp == NULL) { 2056 pml4_pindex = pmap_pml4_pindex(); 2057 pvp = pv_get(pv->pv_pmap, pml4_pindex); 2058 KKASSERT(pvp); 2059 gotpvp = 1; 2060 } 2061 pdp = &pmap->pm_pml4[pdp_index & ((1ul << NPML4EPGSHIFT) - 1)]; 2062 KKASSERT((*pdp & PG_V) != 0); 2063 p = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); 2064 *pdp = 0; 2065 KKASSERT(info == NULL); 2066 } else if (ptepindex >= pmap_pd_pindex(0)) { 2067 /* 2068 * Remove a PD page from the pdp 2069 * 2070 * SIMPLE PMAP NOTE: Non-existant pvp's are ok in the case 2071 * of a simple pmap because it stops at 2072 * the PD page. 2073 */ 2074 vm_pindex_t pdp_pindex; 2075 vm_pindex_t pd_index; 2076 pdp_entry_t *pd; 2077 2078 pd_index = ptepindex - pmap_pd_pindex(0); 2079 2080 if (pvp == NULL) { 2081 pdp_pindex = NUPTE_TOTAL + NUPT_TOTAL + NUPD_TOTAL + 2082 (pd_index >> NPML4EPGSHIFT); 2083 pvp = pv_get(pv->pv_pmap, pdp_pindex); 2084 if (pvp) 2085 gotpvp = 1; 2086 } 2087 if (pvp) { 2088 pd = pv_pte_lookup(pvp, pd_index & 2089 ((1ul << NPDPEPGSHIFT) - 1)); 2090 KKASSERT((*pd & PG_V) != 0); 2091 p = PHYS_TO_VM_PAGE(*pd & PG_FRAME); 2092 *pd = 0; 2093 } else { 2094 KKASSERT(pmap->pm_flags & PMAP_FLAG_SIMPLE); 2095 p = pv->pv_m; /* degenerate test later */ 2096 } 2097 KKASSERT(info == NULL); 2098 } else if (ptepindex >= pmap_pt_pindex(0)) { 2099 /* 2100 * Remove a PT page from the pd 2101 */ 2102 vm_pindex_t pd_pindex; 2103 vm_pindex_t pt_index; 2104 pd_entry_t *pt; 2105 2106 pt_index = ptepindex - pmap_pt_pindex(0); 2107 2108 if (pvp == NULL) { 2109 pd_pindex = NUPTE_TOTAL + NUPT_TOTAL + 2110 (pt_index >> NPDPEPGSHIFT); 2111 pvp = pv_get(pv->pv_pmap, pd_pindex); 2112 KKASSERT(pvp); 2113 gotpvp = 1; 2114 } 2115 pt = pv_pte_lookup(pvp, pt_index & ((1ul << NPDPEPGSHIFT) - 1)); 2116 KKASSERT((*pt & PG_V) != 0); 2117 p = PHYS_TO_VM_PAGE(*pt & PG_FRAME); 2118 *pt = 0; 2119 KKASSERT(info == NULL); 2120 } else { 2121 /* 2122 * Remove a PTE from the PT page 2123 * 2124 * NOTE: pv's must be locked bottom-up to avoid deadlocking. 2125 * pv is a pte_pv so we can safely lock pt_pv. 2126 */ 2127 vm_pindex_t pt_pindex; 2128 pt_entry_t *ptep; 2129 pt_entry_t pte; 2130 vm_offset_t va; 2131 2132 pt_pindex = ptepindex >> NPTEPGSHIFT; 2133 va = (vm_offset_t)ptepindex << PAGE_SHIFT; 2134 2135 if (ptepindex >= NUPTE_USER) { 2136 ptep = vtopte(ptepindex << PAGE_SHIFT); 2137 KKASSERT(pvp == NULL); 2138 } else { 2139 if (pvp == NULL) { 2140 pt_pindex = NUPTE_TOTAL + 2141 (ptepindex >> NPDPEPGSHIFT); 2142 pvp = pv_get(pv->pv_pmap, pt_pindex); 2143 KKASSERT(pvp); 2144 gotpvp = 1; 2145 } 2146 ptep = pv_pte_lookup(pvp, ptepindex & 2147 ((1ul << NPDPEPGSHIFT) - 1)); 2148 } 2149 2150 if (info) 2151 pmap_inval_interlock(info, pmap, va); 2152 pte = pte_load_clear(ptep); 2153 if (info) 2154 pmap_inval_deinterlock(info, pmap); 2155 else 2156 cpu_invlpg((void *)va); 2157 2158 /* 2159 * Now update the vm_page_t 2160 */ 2161 if ((pte & (PG_MANAGED|PG_V)) != (PG_MANAGED|PG_V)) { 2162 kprintf("remove_pte badpte %016lx %016lx %d\n", 2163 pte, pv->pv_pindex, 2164 pv->pv_pindex < pmap_pt_pindex(0)); 2165 } 2166 /*KKASSERT((pte & (PG_MANAGED|PG_V)) == (PG_MANAGED|PG_V));*/ 2167 p = PHYS_TO_VM_PAGE(pte & PG_FRAME); 2168 2169 if (pte & PG_M) { 2170 if (pmap_track_modified(ptepindex)) 2171 vm_page_dirty(p); 2172 } 2173 if (pte & PG_A) { 2174 vm_page_flag_set(p, PG_REFERENCED); 2175 } 2176 if (pte & PG_W) 2177 atomic_add_long(&pmap->pm_stats.wired_count, -1); 2178 if (pte & PG_G) 2179 cpu_invlpg((void *)va); 2180 } 2181 2182 /* 2183 * Unwire the parent page table page. The wire_count cannot go below 2184 * 1 here because the parent page table page is itself still mapped. 2185 * 2186 * XXX remove the assertions later. 2187 */ 2188 KKASSERT(pv->pv_m == p); 2189 if (pvp && vm_page_unwire_quick(pvp->pv_m)) 2190 panic("pmap_remove_pv_pte: Insufficient wire_count"); 2191 2192 if (gotpvp) 2193 pv_put(pvp); 2194 } 2195 2196 static 2197 vm_page_t 2198 pmap_remove_pv_page(pv_entry_t pv) 2199 { 2200 vm_page_t m; 2201 2202 m = pv->pv_m; 2203 KKASSERT(m); 2204 vm_page_spin_lock(m); 2205 pv->pv_m = NULL; 2206 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2207 /* 2208 if (m->object) 2209 atomic_add_int(&m->object->agg_pv_list_count, -1); 2210 */ 2211 if (TAILQ_EMPTY(&m->md.pv_list)) 2212 vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE); 2213 vm_page_spin_unlock(m); 2214 return(m); 2215 } 2216 2217 /* 2218 * Grow the number of kernel page table entries, if needed. 2219 * 2220 * This routine is always called to validate any address space 2221 * beyond KERNBASE (for kldloads). kernel_vm_end only governs the address 2222 * space below KERNBASE. 2223 */ 2224 void 2225 pmap_growkernel(vm_offset_t kstart, vm_offset_t kend) 2226 { 2227 vm_paddr_t paddr; 2228 vm_offset_t ptppaddr; 2229 vm_page_t nkpg; 2230 pd_entry_t *pt, newpt; 2231 pdp_entry_t newpd; 2232 int update_kernel_vm_end; 2233 2234 /* 2235 * bootstrap kernel_vm_end on first real VM use 2236 */ 2237 if (kernel_vm_end == 0) { 2238 kernel_vm_end = VM_MIN_KERNEL_ADDRESS; 2239 nkpt = 0; 2240 while ((*pmap_pt(&kernel_pmap, kernel_vm_end) & PG_V) != 0) { 2241 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & 2242 ~(PAGE_SIZE * NPTEPG - 1); 2243 nkpt++; 2244 if (kernel_vm_end - 1 >= kernel_map.max_offset) { 2245 kernel_vm_end = kernel_map.max_offset; 2246 break; 2247 } 2248 } 2249 } 2250 2251 /* 2252 * Fill in the gaps. kernel_vm_end is only adjusted for ranges 2253 * below KERNBASE. Ranges above KERNBASE are kldloaded and we 2254 * do not want to force-fill 128G worth of page tables. 2255 */ 2256 if (kstart < KERNBASE) { 2257 if (kstart > kernel_vm_end) 2258 kstart = kernel_vm_end; 2259 KKASSERT(kend <= KERNBASE); 2260 update_kernel_vm_end = 1; 2261 } else { 2262 update_kernel_vm_end = 0; 2263 } 2264 2265 kstart = rounddown2(kstart, PAGE_SIZE * NPTEPG); 2266 kend = roundup2(kend, PAGE_SIZE * NPTEPG); 2267 2268 if (kend - 1 >= kernel_map.max_offset) 2269 kend = kernel_map.max_offset; 2270 2271 while (kstart < kend) { 2272 pt = pmap_pt(&kernel_pmap, kstart); 2273 if (pt == NULL) { 2274 /* We need a new PDP entry */ 2275 nkpg = vm_page_alloc(NULL, nkpt, 2276 VM_ALLOC_NORMAL | 2277 VM_ALLOC_SYSTEM | 2278 VM_ALLOC_INTERRUPT); 2279 if (nkpg == NULL) { 2280 panic("pmap_growkernel: no memory to grow " 2281 "kernel"); 2282 } 2283 paddr = VM_PAGE_TO_PHYS(nkpg); 2284 if ((nkpg->flags & PG_ZERO) == 0) 2285 pmap_zero_page(paddr); 2286 vm_page_flag_clear(nkpg, PG_ZERO); 2287 newpd = (pdp_entry_t) 2288 (paddr | PG_V | PG_RW | PG_A | PG_M); 2289 *pmap_pd(&kernel_pmap, kstart) = newpd; 2290 nkpt++; 2291 continue; /* try again */ 2292 } 2293 if ((*pt & PG_V) != 0) { 2294 kstart = (kstart + PAGE_SIZE * NPTEPG) & 2295 ~(PAGE_SIZE * NPTEPG - 1); 2296 if (kstart - 1 >= kernel_map.max_offset) { 2297 kstart = kernel_map.max_offset; 2298 break; 2299 } 2300 continue; 2301 } 2302 2303 /* 2304 * This index is bogus, but out of the way 2305 */ 2306 nkpg = vm_page_alloc(NULL, nkpt, 2307 VM_ALLOC_NORMAL | 2308 VM_ALLOC_SYSTEM | 2309 VM_ALLOC_INTERRUPT); 2310 if (nkpg == NULL) 2311 panic("pmap_growkernel: no memory to grow kernel"); 2312 2313 vm_page_wire(nkpg); 2314 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 2315 pmap_zero_page(ptppaddr); 2316 vm_page_flag_clear(nkpg, PG_ZERO); 2317 newpt = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 2318 *pmap_pt(&kernel_pmap, kstart) = newpt; 2319 nkpt++; 2320 2321 kstart = (kstart + PAGE_SIZE * NPTEPG) & 2322 ~(PAGE_SIZE * NPTEPG - 1); 2323 2324 if (kstart - 1 >= kernel_map.max_offset) { 2325 kstart = kernel_map.max_offset; 2326 break; 2327 } 2328 } 2329 2330 /* 2331 * Only update kernel_vm_end for areas below KERNBASE. 2332 */ 2333 if (update_kernel_vm_end && kernel_vm_end < kstart) 2334 kernel_vm_end = kstart; 2335 } 2336 2337 /* 2338 * Add a reference to the specified pmap. 2339 */ 2340 void 2341 pmap_reference(pmap_t pmap) 2342 { 2343 if (pmap != NULL) { 2344 lwkt_gettoken(&pmap->pm_token); 2345 ++pmap->pm_count; 2346 lwkt_reltoken(&pmap->pm_token); 2347 } 2348 } 2349 2350 /*************************************************** 2351 * page management routines. 2352 ***************************************************/ 2353 2354 /* 2355 * Hold a pv without locking it 2356 */ 2357 static void 2358 pv_hold(pv_entry_t pv) 2359 { 2360 u_int count; 2361 2362 if (atomic_cmpset_int(&pv->pv_hold, 0, 1)) 2363 return; 2364 2365 for (;;) { 2366 count = pv->pv_hold; 2367 cpu_ccfence(); 2368 if (atomic_cmpset_int(&pv->pv_hold, count, count + 1)) 2369 return; 2370 /* retry */ 2371 } 2372 } 2373 2374 /* 2375 * Hold a pv_entry, preventing its destruction. TRUE is returned if the pv 2376 * was successfully locked, FALSE if it wasn't. The caller must dispose of 2377 * the pv properly. 2378 * 2379 * Either the pmap->pm_spin or the related vm_page_spin (if traversing a 2380 * pv list via its page) must be held by the caller. 2381 */ 2382 static int 2383 _pv_hold_try(pv_entry_t pv PMAP_DEBUG_DECL) 2384 { 2385 u_int count; 2386 2387 if (atomic_cmpset_int(&pv->pv_hold, 0, PV_HOLD_LOCKED | 1)) { 2388 #ifdef PMAP_DEBUG 2389 pv->pv_func = func; 2390 pv->pv_line = lineno; 2391 #endif 2392 return TRUE; 2393 } 2394 2395 for (;;) { 2396 count = pv->pv_hold; 2397 cpu_ccfence(); 2398 if ((count & PV_HOLD_LOCKED) == 0) { 2399 if (atomic_cmpset_int(&pv->pv_hold, count, 2400 (count + 1) | PV_HOLD_LOCKED)) { 2401 #ifdef PMAP_DEBUG 2402 pv->pv_func = func; 2403 pv->pv_line = lineno; 2404 #endif 2405 return TRUE; 2406 } 2407 } else { 2408 if (atomic_cmpset_int(&pv->pv_hold, count, count + 1)) 2409 return FALSE; 2410 } 2411 /* retry */ 2412 } 2413 } 2414 2415 /* 2416 * Drop a previously held pv_entry which could not be locked, allowing its 2417 * destruction. 2418 * 2419 * Must not be called with a spinlock held as we might zfree() the pv if it 2420 * is no longer associated with a pmap and this was the last hold count. 2421 */ 2422 static void 2423 pv_drop(pv_entry_t pv) 2424 { 2425 u_int count; 2426 2427 if (atomic_cmpset_int(&pv->pv_hold, 1, 0)) { 2428 if (pv->pv_pmap == NULL) 2429 zfree(pvzone, pv); 2430 return; 2431 } 2432 2433 for (;;) { 2434 count = pv->pv_hold; 2435 cpu_ccfence(); 2436 KKASSERT((count & PV_HOLD_MASK) > 0); 2437 KKASSERT((count & (PV_HOLD_LOCKED | PV_HOLD_MASK)) != 2438 (PV_HOLD_LOCKED | 1)); 2439 if (atomic_cmpset_int(&pv->pv_hold, count, count - 1)) { 2440 if (count == 1 && pv->pv_pmap == NULL) 2441 zfree(pvzone, pv); 2442 return; 2443 } 2444 /* retry */ 2445 } 2446 } 2447 2448 /* 2449 * Find or allocate the requested PV entry, returning a locked pv 2450 */ 2451 static 2452 pv_entry_t 2453 _pv_alloc(pmap_t pmap, vm_pindex_t pindex, int *isnew PMAP_DEBUG_DECL) 2454 { 2455 pv_entry_t pv; 2456 pv_entry_t pnew = NULL; 2457 2458 spin_lock(&pmap->pm_spin); 2459 for (;;) { 2460 if ((pv = pmap->pm_pvhint) == NULL || pv->pv_pindex != pindex) { 2461 pv = pv_entry_rb_tree_RB_LOOKUP(&pmap->pm_pvroot, 2462 pindex); 2463 } 2464 if (pv == NULL) { 2465 if (pnew == NULL) { 2466 spin_unlock(&pmap->pm_spin); 2467 pnew = zalloc(pvzone); 2468 spin_lock(&pmap->pm_spin); 2469 continue; 2470 } 2471 pnew->pv_pmap = pmap; 2472 pnew->pv_pindex = pindex; 2473 pnew->pv_hold = PV_HOLD_LOCKED | 1; 2474 #ifdef PMAP_DEBUG 2475 pnew->pv_func = func; 2476 pnew->pv_line = lineno; 2477 #endif 2478 pv_entry_rb_tree_RB_INSERT(&pmap->pm_pvroot, pnew); 2479 atomic_add_long(&pmap->pm_stats.resident_count, 1); 2480 spin_unlock(&pmap->pm_spin); 2481 *isnew = 1; 2482 return(pnew); 2483 } 2484 if (pnew) { 2485 spin_unlock(&pmap->pm_spin); 2486 zfree(pvzone, pnew); 2487 pnew = NULL; 2488 spin_lock(&pmap->pm_spin); 2489 continue; 2490 } 2491 if (_pv_hold_try(pv PMAP_DEBUG_COPY)) { 2492 spin_unlock(&pmap->pm_spin); 2493 *isnew = 0; 2494 return(pv); 2495 } 2496 spin_unlock(&pmap->pm_spin); 2497 _pv_lock(pv PMAP_DEBUG_COPY); 2498 if (pv->pv_pmap == pmap && pv->pv_pindex == pindex) { 2499 *isnew = 0; 2500 return(pv); 2501 } 2502 pv_put(pv); 2503 spin_lock(&pmap->pm_spin); 2504 } 2505 2506 2507 } 2508 2509 /* 2510 * Find the requested PV entry, returning a locked+held pv or NULL 2511 */ 2512 static 2513 pv_entry_t 2514 _pv_get(pmap_t pmap, vm_pindex_t pindex PMAP_DEBUG_DECL) 2515 { 2516 pv_entry_t pv; 2517 2518 spin_lock(&pmap->pm_spin); 2519 for (;;) { 2520 /* 2521 * Shortcut cache 2522 */ 2523 if ((pv = pmap->pm_pvhint) == NULL || pv->pv_pindex != pindex) { 2524 pv = pv_entry_rb_tree_RB_LOOKUP(&pmap->pm_pvroot, 2525 pindex); 2526 } 2527 if (pv == NULL) { 2528 spin_unlock(&pmap->pm_spin); 2529 return NULL; 2530 } 2531 if (_pv_hold_try(pv PMAP_DEBUG_COPY)) { 2532 pv_cache(pv, pindex); 2533 spin_unlock(&pmap->pm_spin); 2534 return(pv); 2535 } 2536 spin_unlock(&pmap->pm_spin); 2537 _pv_lock(pv PMAP_DEBUG_COPY); 2538 if (pv->pv_pmap == pmap && pv->pv_pindex == pindex) 2539 return(pv); 2540 pv_put(pv); 2541 spin_lock(&pmap->pm_spin); 2542 } 2543 } 2544 2545 /* 2546 * Lookup, hold, and attempt to lock (pmap,pindex). 2547 * 2548 * If the entry does not exist NULL is returned and *errorp is set to 0 2549 * 2550 * If the entry exists and could be successfully locked it is returned and 2551 * errorp is set to 0. 2552 * 2553 * If the entry exists but could NOT be successfully locked it is returned 2554 * held and *errorp is set to 1. 2555 */ 2556 static 2557 pv_entry_t 2558 pv_get_try(pmap_t pmap, vm_pindex_t pindex, int *errorp) 2559 { 2560 pv_entry_t pv; 2561 2562 spin_lock(&pmap->pm_spin); 2563 if ((pv = pmap->pm_pvhint) == NULL || pv->pv_pindex != pindex) 2564 pv = pv_entry_rb_tree_RB_LOOKUP(&pmap->pm_pvroot, pindex); 2565 if (pv == NULL) { 2566 spin_unlock(&pmap->pm_spin); 2567 *errorp = 0; 2568 return NULL; 2569 } 2570 if (pv_hold_try(pv)) { 2571 pv_cache(pv, pindex); 2572 spin_unlock(&pmap->pm_spin); 2573 *errorp = 0; 2574 return(pv); /* lock succeeded */ 2575 } 2576 spin_unlock(&pmap->pm_spin); 2577 *errorp = 1; 2578 return (pv); /* lock failed */ 2579 } 2580 2581 /* 2582 * Find the requested PV entry, returning a held pv or NULL 2583 */ 2584 static 2585 pv_entry_t 2586 pv_find(pmap_t pmap, vm_pindex_t pindex) 2587 { 2588 pv_entry_t pv; 2589 2590 spin_lock(&pmap->pm_spin); 2591 2592 if ((pv = pmap->pm_pvhint) == NULL || pv->pv_pindex != pindex) 2593 pv = pv_entry_rb_tree_RB_LOOKUP(&pmap->pm_pvroot, pindex); 2594 if (pv == NULL) { 2595 spin_unlock(&pmap->pm_spin); 2596 return NULL; 2597 } 2598 pv_hold(pv); 2599 pv_cache(pv, pindex); 2600 spin_unlock(&pmap->pm_spin); 2601 return(pv); 2602 } 2603 2604 /* 2605 * Lock a held pv, keeping the hold count 2606 */ 2607 static 2608 void 2609 _pv_lock(pv_entry_t pv PMAP_DEBUG_DECL) 2610 { 2611 u_int count; 2612 2613 for (;;) { 2614 count = pv->pv_hold; 2615 cpu_ccfence(); 2616 if ((count & PV_HOLD_LOCKED) == 0) { 2617 if (atomic_cmpset_int(&pv->pv_hold, count, 2618 count | PV_HOLD_LOCKED)) { 2619 #ifdef PMAP_DEBUG 2620 pv->pv_func = func; 2621 pv->pv_line = lineno; 2622 #endif 2623 return; 2624 } 2625 continue; 2626 } 2627 tsleep_interlock(pv, 0); 2628 if (atomic_cmpset_int(&pv->pv_hold, count, 2629 count | PV_HOLD_WAITING)) { 2630 #ifdef PMAP_DEBUG 2631 kprintf("pv waiting on %s:%d\n", 2632 pv->pv_func, pv->pv_line); 2633 #endif 2634 tsleep(pv, PINTERLOCKED, "pvwait", hz); 2635 } 2636 /* retry */ 2637 } 2638 } 2639 2640 /* 2641 * Unlock a held and locked pv, keeping the hold count. 2642 */ 2643 static 2644 void 2645 pv_unlock(pv_entry_t pv) 2646 { 2647 u_int count; 2648 2649 if (atomic_cmpset_int(&pv->pv_hold, PV_HOLD_LOCKED | 1, 1)) 2650 return; 2651 2652 for (;;) { 2653 count = pv->pv_hold; 2654 cpu_ccfence(); 2655 KKASSERT((count & (PV_HOLD_LOCKED|PV_HOLD_MASK)) >= 2656 (PV_HOLD_LOCKED | 1)); 2657 if (atomic_cmpset_int(&pv->pv_hold, count, 2658 count & 2659 ~(PV_HOLD_LOCKED | PV_HOLD_WAITING))) { 2660 if (count & PV_HOLD_WAITING) 2661 wakeup(pv); 2662 break; 2663 } 2664 } 2665 } 2666 2667 /* 2668 * Unlock and drop a pv. If the pv is no longer associated with a pmap 2669 * and the hold count drops to zero we will free it. 2670 * 2671 * Caller should not hold any spin locks. We are protected from hold races 2672 * by virtue of holds only occuring only with a pmap_spin or vm_page_spin 2673 * lock held. A pv cannot be located otherwise. 2674 */ 2675 static 2676 void 2677 pv_put(pv_entry_t pv) 2678 { 2679 if (atomic_cmpset_int(&pv->pv_hold, PV_HOLD_LOCKED | 1, 0)) { 2680 if (pv->pv_pmap == NULL) 2681 zfree(pvzone, pv); 2682 return; 2683 } 2684 pv_unlock(pv); 2685 pv_drop(pv); 2686 } 2687 2688 /* 2689 * Unlock, drop, and free a pv, destroying it. The pv is removed from its 2690 * pmap. Any pte operations must have already been completed. 2691 */ 2692 static 2693 void 2694 pv_free(pv_entry_t pv) 2695 { 2696 pmap_t pmap; 2697 2698 KKASSERT(pv->pv_m == NULL); 2699 if ((pmap = pv->pv_pmap) != NULL) { 2700 spin_lock(&pmap->pm_spin); 2701 pv_entry_rb_tree_RB_REMOVE(&pmap->pm_pvroot, pv); 2702 if (pmap->pm_pvhint == pv) 2703 pmap->pm_pvhint = NULL; 2704 atomic_add_long(&pmap->pm_stats.resident_count, -1); 2705 pv->pv_pmap = NULL; 2706 pv->pv_pindex = 0; 2707 spin_unlock(&pmap->pm_spin); 2708 } 2709 pv_put(pv); 2710 } 2711 2712 /* 2713 * This routine is very drastic, but can save the system 2714 * in a pinch. 2715 */ 2716 void 2717 pmap_collect(void) 2718 { 2719 int i; 2720 vm_page_t m; 2721 static int warningdone=0; 2722 2723 if (pmap_pagedaemon_waken == 0) 2724 return; 2725 pmap_pagedaemon_waken = 0; 2726 if (warningdone < 5) { 2727 kprintf("pmap_collect: collecting pv entries -- " 2728 "suggest increasing PMAP_SHPGPERPROC\n"); 2729 warningdone++; 2730 } 2731 2732 for (i = 0; i < vm_page_array_size; i++) { 2733 m = &vm_page_array[i]; 2734 if (m->wire_count || m->hold_count) 2735 continue; 2736 if (vm_page_busy_try(m, TRUE) == 0) { 2737 if (m->wire_count == 0 && m->hold_count == 0) { 2738 pmap_remove_all(m); 2739 } 2740 vm_page_wakeup(m); 2741 } 2742 } 2743 } 2744 2745 /* 2746 * Scan the pmap for active page table entries and issue a callback. 2747 * The callback must dispose of pte_pv, whos PTE entry is at *ptep in 2748 * its parent page table. 2749 * 2750 * pte_pv will be NULL if the page or page table is unmanaged. 2751 * pt_pv will point to the page table page containing the pte for the page. 2752 * 2753 * NOTE! If we come across an unmanaged page TABLE (verses an unmanaged page), 2754 * we pass a NULL pte_pv and we pass a pt_pv pointing to the passed 2755 * process pmap's PD and page to the callback function. This can be 2756 * confusing because the pt_pv is really a pd_pv, and the target page 2757 * table page is simply aliased by the pmap and not owned by it. 2758 * 2759 * It is assumed that the start and end are properly rounded to the page size. 2760 * 2761 * It is assumed that PD pages and above are managed and thus in the RB tree, 2762 * allowing us to use RB_SCAN from the PD pages down for ranged scans. 2763 */ 2764 struct pmap_scan_info { 2765 struct pmap *pmap; 2766 vm_offset_t sva; 2767 vm_offset_t eva; 2768 vm_pindex_t sva_pd_pindex; 2769 vm_pindex_t eva_pd_pindex; 2770 void (*func)(pmap_t, struct pmap_scan_info *, 2771 pv_entry_t, pv_entry_t, int, vm_offset_t, 2772 pt_entry_t *, void *); 2773 void *arg; 2774 int doinval; 2775 struct pmap_inval_info inval; 2776 }; 2777 2778 static int pmap_scan_cmp(pv_entry_t pv, void *data); 2779 static int pmap_scan_callback(pv_entry_t pv, void *data); 2780 2781 static void 2782 pmap_scan(struct pmap_scan_info *info) 2783 { 2784 struct pmap *pmap = info->pmap; 2785 pv_entry_t pd_pv; /* A page directory PV */ 2786 pv_entry_t pt_pv; /* A page table PV */ 2787 pv_entry_t pte_pv; /* A page table entry PV */ 2788 pt_entry_t *ptep; 2789 struct pv_entry dummy_pv; 2790 2791 if (pmap == NULL) 2792 return; 2793 2794 /* 2795 * Hold the token for stability; if the pmap is empty we have nothing 2796 * to do. 2797 */ 2798 lwkt_gettoken(&pmap->pm_token); 2799 #if 0 2800 if (pmap->pm_stats.resident_count == 0) { 2801 lwkt_reltoken(&pmap->pm_token); 2802 return; 2803 } 2804 #endif 2805 2806 pmap_inval_init(&info->inval); 2807 2808 /* 2809 * Special handling for scanning one page, which is a very common 2810 * operation (it is?). 2811 * 2812 * NOTE: Locks must be ordered bottom-up. pte,pt,pd,pdp,pml4 2813 */ 2814 if (info->sva + PAGE_SIZE == info->eva) { 2815 if (info->sva >= VM_MAX_USER_ADDRESS) { 2816 /* 2817 * Kernel mappings do not track wire counts on 2818 * page table pages and only maintain pd_pv and 2819 * pte_pv levels so pmap_scan() works. 2820 */ 2821 pt_pv = NULL; 2822 pte_pv = pv_get(pmap, pmap_pte_pindex(info->sva)); 2823 ptep = vtopte(info->sva); 2824 } else { 2825 /* 2826 * User pages which are unmanaged will not have a 2827 * pte_pv. User page table pages which are unmanaged 2828 * (shared from elsewhere) will also not have a pt_pv. 2829 * The func() callback will pass both pte_pv and pt_pv 2830 * as NULL in that case. 2831 */ 2832 pte_pv = pv_get(pmap, pmap_pte_pindex(info->sva)); 2833 pt_pv = pv_get(pmap, pmap_pt_pindex(info->sva)); 2834 if (pt_pv == NULL) { 2835 KKASSERT(pte_pv == NULL); 2836 pd_pv = pv_get(pmap, pmap_pd_pindex(info->sva)); 2837 if (pd_pv) { 2838 ptep = pv_pte_lookup(pd_pv, 2839 pmap_pt_index(info->sva)); 2840 if (*ptep) { 2841 info->func(pmap, info, 2842 NULL, pd_pv, 1, 2843 info->sva, ptep, 2844 info->arg); 2845 } 2846 pv_put(pd_pv); 2847 } 2848 goto fast_skip; 2849 } 2850 ptep = pv_pte_lookup(pt_pv, pmap_pte_index(info->sva)); 2851 } 2852 if (*ptep == 0) { 2853 /* 2854 * Unlike the pv_find() case below we actually 2855 * acquired a locked pv in this case so any 2856 * race should have been resolved. It is expected 2857 * to not exist. 2858 */ 2859 KKASSERT(pte_pv == NULL); 2860 } else if (pte_pv) { 2861 KASSERT((*ptep & (PG_MANAGED|PG_V)) == (PG_MANAGED| 2862 PG_V), 2863 ("bad *ptep %016lx sva %016lx pte_pv %p", 2864 *ptep, info->sva, pte_pv)); 2865 info->func(pmap, info, pte_pv, pt_pv, 0, 2866 info->sva, ptep, info->arg); 2867 } else { 2868 KASSERT((*ptep & (PG_MANAGED|PG_V)) == PG_V, 2869 ("bad *ptep %016lx sva %016lx pte_pv NULL", 2870 *ptep, info->sva)); 2871 info->func(pmap, info, NULL, pt_pv, 0, 2872 info->sva, ptep, info->arg); 2873 } 2874 if (pt_pv) 2875 pv_put(pt_pv); 2876 fast_skip: 2877 pmap_inval_done(&info->inval); 2878 lwkt_reltoken(&pmap->pm_token); 2879 return; 2880 } 2881 2882 /* 2883 * Nominal scan case, RB_SCAN() for PD pages and iterate from 2884 * there. 2885 */ 2886 info->sva_pd_pindex = pmap_pd_pindex(info->sva); 2887 info->eva_pd_pindex = pmap_pd_pindex(info->eva + NBPDP - 1); 2888 2889 if (info->sva >= VM_MAX_USER_ADDRESS) { 2890 /* 2891 * The kernel does not currently maintain any pv_entry's for 2892 * higher-level page tables. 2893 */ 2894 bzero(&dummy_pv, sizeof(dummy_pv)); 2895 dummy_pv.pv_pindex = info->sva_pd_pindex; 2896 spin_lock(&pmap->pm_spin); 2897 while (dummy_pv.pv_pindex < info->eva_pd_pindex) { 2898 pmap_scan_callback(&dummy_pv, info); 2899 ++dummy_pv.pv_pindex; 2900 } 2901 spin_unlock(&pmap->pm_spin); 2902 } else { 2903 /* 2904 * User page tables maintain local PML4, PDP, and PD 2905 * pv_entry's at the very least. PT pv's might be 2906 * unmanaged and thus not exist. PTE pv's might be 2907 * unmanaged and thus not exist. 2908 */ 2909 spin_lock(&pmap->pm_spin); 2910 pv_entry_rb_tree_RB_SCAN(&pmap->pm_pvroot, 2911 pmap_scan_cmp, pmap_scan_callback, info); 2912 spin_unlock(&pmap->pm_spin); 2913 } 2914 pmap_inval_done(&info->inval); 2915 lwkt_reltoken(&pmap->pm_token); 2916 } 2917 2918 /* 2919 * WARNING! pmap->pm_spin held 2920 */ 2921 static int 2922 pmap_scan_cmp(pv_entry_t pv, void *data) 2923 { 2924 struct pmap_scan_info *info = data; 2925 if (pv->pv_pindex < info->sva_pd_pindex) 2926 return(-1); 2927 if (pv->pv_pindex >= info->eva_pd_pindex) 2928 return(1); 2929 return(0); 2930 } 2931 2932 /* 2933 * WARNING! pmap->pm_spin held 2934 */ 2935 static int 2936 pmap_scan_callback(pv_entry_t pv, void *data) 2937 { 2938 struct pmap_scan_info *info = data; 2939 struct pmap *pmap = info->pmap; 2940 pv_entry_t pd_pv; /* A page directory PV */ 2941 pv_entry_t pt_pv; /* A page table PV */ 2942 pv_entry_t pte_pv; /* A page table entry PV */ 2943 pt_entry_t *ptep; 2944 vm_offset_t sva; 2945 vm_offset_t eva; 2946 vm_offset_t va_next; 2947 vm_pindex_t pd_pindex; 2948 int error; 2949 2950 /* 2951 * Pull the PD pindex from the pv before releasing the spinlock. 2952 * 2953 * WARNING: pv is faked for kernel pmap scans. 2954 */ 2955 pd_pindex = pv->pv_pindex; 2956 spin_unlock(&pmap->pm_spin); 2957 pv = NULL; /* invalid after spinlock unlocked */ 2958 2959 /* 2960 * Calculate the page range within the PD. SIMPLE pmaps are 2961 * direct-mapped for the entire 2^64 address space. Normal pmaps 2962 * reflect the user and kernel address space which requires 2963 * cannonicalization w/regards to converting pd_pindex's back 2964 * into addresses. 2965 */ 2966 sva = (pd_pindex - NUPTE_TOTAL - NUPT_TOTAL) << PDPSHIFT; 2967 if ((pmap->pm_flags & PMAP_FLAG_SIMPLE) == 0 && 2968 (sva & PML4_SIGNMASK)) { 2969 sva |= PML4_SIGNMASK; 2970 } 2971 eva = sva + NBPDP; /* can overflow */ 2972 if (sva < info->sva) 2973 sva = info->sva; 2974 if (eva < info->sva || eva > info->eva) 2975 eva = info->eva; 2976 2977 /* 2978 * NOTE: kernel mappings do not track page table pages, only 2979 * terminal pages. 2980 * 2981 * NOTE: Locks must be ordered bottom-up. pte,pt,pd,pdp,pml4. 2982 * However, for the scan to be efficient we try to 2983 * cache items top-down. 2984 */ 2985 pd_pv = NULL; 2986 pt_pv = NULL; 2987 2988 for (; sva < eva; sva = va_next) { 2989 if (sva >= VM_MAX_USER_ADDRESS) { 2990 if (pt_pv) { 2991 pv_put(pt_pv); 2992 pt_pv = NULL; 2993 } 2994 goto kernel_skip; 2995 } 2996 2997 /* 2998 * PD cache (degenerate case if we skip). It is possible 2999 * for the PD to not exist due to races. This is ok. 3000 */ 3001 if (pd_pv == NULL) { 3002 pd_pv = pv_get(pmap, pmap_pd_pindex(sva)); 3003 } else if (pd_pv->pv_pindex != pmap_pd_pindex(sva)) { 3004 pv_put(pd_pv); 3005 pd_pv = pv_get(pmap, pmap_pd_pindex(sva)); 3006 } 3007 if (pd_pv == NULL) { 3008 va_next = (sva + NBPDP) & ~PDPMASK; 3009 if (va_next < sva) 3010 va_next = eva; 3011 continue; 3012 } 3013 3014 /* 3015 * PT cache 3016 */ 3017 if (pt_pv == NULL) { 3018 if (pd_pv) { 3019 pv_put(pd_pv); 3020 pd_pv = NULL; 3021 } 3022 pt_pv = pv_get(pmap, pmap_pt_pindex(sva)); 3023 } else if (pt_pv->pv_pindex != pmap_pt_pindex(sva)) { 3024 if (pd_pv) { 3025 pv_put(pd_pv); 3026 pd_pv = NULL; 3027 } 3028 pv_put(pt_pv); 3029 pt_pv = pv_get(pmap, pmap_pt_pindex(sva)); 3030 } 3031 3032 /* 3033 * If pt_pv is NULL we either have an shared page table 3034 * page and must issue a callback specific to that case, 3035 * or there is no page table page. 3036 * 3037 * Either way we can skip the page table page. 3038 */ 3039 if (pt_pv == NULL) { 3040 /* 3041 * Possible unmanaged (shared from another pmap) 3042 * page table page. 3043 */ 3044 if (pd_pv == NULL) 3045 pd_pv = pv_get(pmap, pmap_pd_pindex(sva)); 3046 KKASSERT(pd_pv != NULL); 3047 ptep = pv_pte_lookup(pd_pv, pmap_pt_index(sva)); 3048 if (*ptep & PG_V) { 3049 info->func(pmap, info, NULL, pd_pv, 1, 3050 sva, ptep, info->arg); 3051 } 3052 3053 /* 3054 * Done, move to next page table page. 3055 */ 3056 va_next = (sva + NBPDR) & ~PDRMASK; 3057 if (va_next < sva) 3058 va_next = eva; 3059 continue; 3060 } 3061 3062 /* 3063 * From this point in the loop testing pt_pv for non-NULL 3064 * means we are in UVM, else if it is NULL we are in KVM. 3065 * 3066 * Limit our scan to either the end of the va represented 3067 * by the current page table page, or to the end of the 3068 * range being removed. 3069 */ 3070 kernel_skip: 3071 va_next = (sva + NBPDR) & ~PDRMASK; 3072 if (va_next < sva) 3073 va_next = eva; 3074 if (va_next > eva) 3075 va_next = eva; 3076 3077 /* 3078 * Scan the page table for pages. Some pages may not be 3079 * managed (might not have a pv_entry). 3080 * 3081 * There is no page table management for kernel pages so 3082 * pt_pv will be NULL in that case, but otherwise pt_pv 3083 * is non-NULL, locked, and referenced. 3084 */ 3085 3086 /* 3087 * At this point a non-NULL pt_pv means a UVA, and a NULL 3088 * pt_pv means a KVA. 3089 */ 3090 if (pt_pv) 3091 ptep = pv_pte_lookup(pt_pv, pmap_pte_index(sva)); 3092 else 3093 ptep = vtopte(sva); 3094 3095 while (sva < va_next) { 3096 /* 3097 * Acquire the related pte_pv, if any. If *ptep == 0 3098 * the related pte_pv should not exist, but if *ptep 3099 * is not zero the pte_pv may or may not exist (e.g. 3100 * will not exist for an unmanaged page). 3101 * 3102 * However a multitude of races are possible here. 3103 * 3104 * In addition, the (pt_pv, pte_pv) lock order is 3105 * backwards, so we have to be careful in aquiring 3106 * a properly locked pte_pv. 3107 */ 3108 if (pt_pv) { 3109 pte_pv = pv_get_try(pmap, pmap_pte_pindex(sva), 3110 &error); 3111 if (error) { 3112 if (pd_pv) { 3113 pv_put(pd_pv); 3114 pd_pv = NULL; 3115 } 3116 pv_put(pt_pv); /* must be non-NULL */ 3117 pt_pv = NULL; 3118 pv_lock(pte_pv); /* safe to block now */ 3119 pv_put(pte_pv); 3120 pte_pv = NULL; 3121 pt_pv = pv_get(pmap, 3122 pmap_pt_pindex(sva)); 3123 /* 3124 * pt_pv reloaded, need new ptep 3125 */ 3126 KKASSERT(pt_pv != NULL); 3127 ptep = pv_pte_lookup(pt_pv, 3128 pmap_pte_index(sva)); 3129 continue; 3130 } 3131 } else { 3132 pte_pv = pv_get(pmap, pmap_pte_pindex(sva)); 3133 } 3134 3135 /* 3136 * Ok, if *ptep == 0 we had better NOT have a pte_pv. 3137 */ 3138 if (*ptep == 0) { 3139 if (pte_pv) { 3140 kprintf("Unexpected non-NULL pte_pv " 3141 "%p pt_pv %p *ptep = %016lx\n", 3142 pte_pv, pt_pv, *ptep); 3143 panic("Unexpected non-NULL pte_pv"); 3144 } 3145 sva += PAGE_SIZE; 3146 ++ptep; 3147 continue; 3148 } 3149 3150 /* 3151 * Ready for the callback. The locked pte_pv (if any) 3152 * is consumed by the callback. pte_pv will exist if 3153 * the page is managed, and will not exist if it 3154 * isn't. 3155 */ 3156 if (pte_pv) { 3157 KASSERT((*ptep & (PG_MANAGED|PG_V)) == 3158 (PG_MANAGED|PG_V), 3159 ("bad *ptep %016lx sva %016lx " 3160 "pte_pv %p", 3161 *ptep, sva, pte_pv)); 3162 info->func(pmap, info, pte_pv, pt_pv, 0, 3163 sva, ptep, info->arg); 3164 } else { 3165 KASSERT((*ptep & (PG_MANAGED|PG_V)) == 3166 PG_V, 3167 ("bad *ptep %016lx sva %016lx " 3168 "pte_pv NULL", 3169 *ptep, sva)); 3170 info->func(pmap, info, NULL, pt_pv, 0, 3171 sva, ptep, info->arg); 3172 } 3173 pte_pv = NULL; 3174 sva += PAGE_SIZE; 3175 ++ptep; 3176 } 3177 lwkt_yield(); 3178 } 3179 if (pd_pv) { 3180 pv_put(pd_pv); 3181 pd_pv = NULL; 3182 } 3183 if (pt_pv) { 3184 pv_put(pt_pv); 3185 pt_pv = NULL; 3186 } 3187 lwkt_yield(); 3188 3189 /* 3190 * Relock before returning. 3191 */ 3192 spin_lock(&pmap->pm_spin); 3193 return (0); 3194 } 3195 3196 void 3197 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 3198 { 3199 struct pmap_scan_info info; 3200 3201 info.pmap = pmap; 3202 info.sva = sva; 3203 info.eva = eva; 3204 info.func = pmap_remove_callback; 3205 info.arg = NULL; 3206 info.doinval = 1; /* normal remove requires pmap inval */ 3207 pmap_scan(&info); 3208 } 3209 3210 static void 3211 pmap_remove_noinval(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) 3212 { 3213 struct pmap_scan_info info; 3214 3215 info.pmap = pmap; 3216 info.sva = sva; 3217 info.eva = eva; 3218 info.func = pmap_remove_callback; 3219 info.arg = NULL; 3220 info.doinval = 0; /* normal remove requires pmap inval */ 3221 pmap_scan(&info); 3222 } 3223 3224 static void 3225 pmap_remove_callback(pmap_t pmap, struct pmap_scan_info *info, 3226 pv_entry_t pte_pv, pv_entry_t pt_pv, int sharept, 3227 vm_offset_t va, pt_entry_t *ptep, void *arg __unused) 3228 { 3229 pt_entry_t pte; 3230 3231 if (pte_pv) { 3232 /* 3233 * This will also drop pt_pv's wire_count. Note that 3234 * terminal pages are not wired based on mmu presence. 3235 */ 3236 if (info->doinval) 3237 pmap_remove_pv_pte(pte_pv, pt_pv, &info->inval); 3238 else 3239 pmap_remove_pv_pte(pte_pv, pt_pv, NULL); 3240 pmap_remove_pv_page(pte_pv); 3241 pv_free(pte_pv); 3242 } else if (sharept == 0) { 3243 /* 3244 * Unmanaged page 3245 * 3246 * pt_pv's wire_count is still bumped by unmanaged pages 3247 * so we must decrement it manually. 3248 */ 3249 if (info->doinval) 3250 pmap_inval_interlock(&info->inval, pmap, va); 3251 pte = pte_load_clear(ptep); 3252 if (info->doinval) 3253 pmap_inval_deinterlock(&info->inval, pmap); 3254 if (pte & PG_W) 3255 atomic_add_long(&pmap->pm_stats.wired_count, -1); 3256 atomic_add_long(&pmap->pm_stats.resident_count, -1); 3257 if (vm_page_unwire_quick(pt_pv->pv_m)) 3258 panic("pmap_remove: insufficient wirecount"); 3259 } else { 3260 /* 3261 * Unmanaged page table, pt_pv is actually the pd_pv 3262 * for our pmap (not the share object pmap). 3263 * 3264 * We have to unwire the target page table page and we 3265 * have to unwire our page directory page. 3266 */ 3267 if (info->doinval) 3268 pmap_inval_interlock(&info->inval, pmap, va); 3269 pte = pte_load_clear(ptep); 3270 if (info->doinval) 3271 pmap_inval_deinterlock(&info->inval, pmap); 3272 atomic_add_long(&pmap->pm_stats.resident_count, -1); 3273 if (vm_page_unwire_quick(PHYS_TO_VM_PAGE(pte & PG_FRAME))) 3274 panic("pmap_remove: shared pgtable1 bad wirecount"); 3275 if (vm_page_unwire_quick(pt_pv->pv_m)) 3276 panic("pmap_remove: shared pgtable2 bad wirecount"); 3277 } 3278 } 3279 3280 /* 3281 * Removes this physical page from all physical maps in which it resides. 3282 * Reflects back modify bits to the pager. 3283 * 3284 * This routine may not be called from an interrupt. 3285 */ 3286 static 3287 void 3288 pmap_remove_all(vm_page_t m) 3289 { 3290 struct pmap_inval_info info; 3291 pv_entry_t pv; 3292 3293 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 3294 return; 3295 3296 pmap_inval_init(&info); 3297 vm_page_spin_lock(m); 3298 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3299 KKASSERT(pv->pv_m == m); 3300 if (pv_hold_try(pv)) { 3301 vm_page_spin_unlock(m); 3302 } else { 3303 vm_page_spin_unlock(m); 3304 pv_lock(pv); 3305 if (pv->pv_m != m) { 3306 pv_put(pv); 3307 vm_page_spin_lock(m); 3308 continue; 3309 } 3310 } 3311 /* 3312 * Holding no spinlocks, pv is locked. 3313 */ 3314 pmap_remove_pv_pte(pv, NULL, &info); 3315 pmap_remove_pv_page(pv); 3316 pv_free(pv); 3317 vm_page_spin_lock(m); 3318 } 3319 KKASSERT((m->flags & (PG_MAPPED|PG_WRITEABLE)) == 0); 3320 vm_page_spin_unlock(m); 3321 pmap_inval_done(&info); 3322 } 3323 3324 /* 3325 * Set the physical protection on the specified range of this map 3326 * as requested. This function is typically only used for debug watchpoints 3327 * and COW pages. 3328 * 3329 * This function may not be called from an interrupt if the map is 3330 * not the kernel_pmap. 3331 * 3332 * NOTE! For shared page table pages we just unmap the page. 3333 */ 3334 void 3335 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 3336 { 3337 struct pmap_scan_info info; 3338 /* JG review for NX */ 3339 3340 if (pmap == NULL) 3341 return; 3342 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 3343 pmap_remove(pmap, sva, eva); 3344 return; 3345 } 3346 if (prot & VM_PROT_WRITE) 3347 return; 3348 info.pmap = pmap; 3349 info.sva = sva; 3350 info.eva = eva; 3351 info.func = pmap_protect_callback; 3352 info.arg = &prot; 3353 info.doinval = 1; 3354 pmap_scan(&info); 3355 } 3356 3357 static 3358 void 3359 pmap_protect_callback(pmap_t pmap, struct pmap_scan_info *info, 3360 pv_entry_t pte_pv, pv_entry_t pt_pv, int sharept, 3361 vm_offset_t va, pt_entry_t *ptep, void *arg __unused) 3362 { 3363 pt_entry_t pbits; 3364 pt_entry_t cbits; 3365 pt_entry_t pte; 3366 vm_page_t m; 3367 3368 /* 3369 * XXX non-optimal. 3370 */ 3371 pmap_inval_interlock(&info->inval, pmap, va); 3372 again: 3373 pbits = *ptep; 3374 cbits = pbits; 3375 if (pte_pv) { 3376 m = NULL; 3377 if (pbits & PG_A) { 3378 m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 3379 KKASSERT(m == pte_pv->pv_m); 3380 vm_page_flag_set(m, PG_REFERENCED); 3381 cbits &= ~PG_A; 3382 } 3383 if (pbits & PG_M) { 3384 if (pmap_track_modified(pte_pv->pv_pindex)) { 3385 if (m == NULL) 3386 m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 3387 vm_page_dirty(m); 3388 cbits &= ~PG_M; 3389 } 3390 } 3391 } else if (sharept) { 3392 /* 3393 * Unmanaged page table, pt_pv is actually the pd_pv 3394 * for our pmap (not the share object pmap). 3395 * 3396 * When asked to protect something in a shared page table 3397 * page we just unmap the page table page. We have to 3398 * invalidate the tlb in this situation. 3399 */ 3400 pte = pte_load_clear(ptep); 3401 pmap_inval_invltlb(&info->inval); 3402 if (vm_page_unwire_quick(PHYS_TO_VM_PAGE(pte & PG_FRAME))) 3403 panic("pmap_protect: pgtable1 pg bad wirecount"); 3404 if (vm_page_unwire_quick(pt_pv->pv_m)) 3405 panic("pmap_protect: pgtable2 pg bad wirecount"); 3406 ptep = NULL; 3407 } 3408 /* else unmanaged page, adjust bits, no wire changes */ 3409 3410 if (ptep) { 3411 cbits &= ~PG_RW; 3412 if (pbits != cbits && !atomic_cmpset_long(ptep, pbits, cbits)) { 3413 goto again; 3414 } 3415 } 3416 pmap_inval_deinterlock(&info->inval, pmap); 3417 if (pte_pv) 3418 pv_put(pte_pv); 3419 } 3420 3421 /* 3422 * Insert the vm_page (m) at the virtual address (va), replacing any prior 3423 * mapping at that address. Set protection and wiring as requested. 3424 * 3425 * If entry is non-NULL we check to see if the SEG_SIZE optimization is 3426 * possible. If it is we enter the page into the appropriate shared pmap 3427 * hanging off the related VM object instead of the passed pmap, then we 3428 * share the page table page from the VM object's pmap into the current pmap. 3429 * 3430 * NOTE: This routine MUST insert the page into the pmap now, it cannot 3431 * lazy-evaluate. 3432 */ 3433 void 3434 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 3435 boolean_t wired, vm_map_entry_t entry) 3436 { 3437 pmap_inval_info info; 3438 pv_entry_t pt_pv; /* page table */ 3439 pv_entry_t pte_pv; /* page table entry */ 3440 pt_entry_t *ptep; 3441 vm_paddr_t opa; 3442 pt_entry_t origpte, newpte; 3443 vm_paddr_t pa; 3444 3445 if (pmap == NULL) 3446 return; 3447 va = trunc_page(va); 3448 #ifdef PMAP_DIAGNOSTIC 3449 if (va >= KvaEnd) 3450 panic("pmap_enter: toobig"); 3451 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 3452 panic("pmap_enter: invalid to pmap_enter page table " 3453 "pages (va: 0x%lx)", va); 3454 #endif 3455 if (va < UPT_MAX_ADDRESS && pmap == &kernel_pmap) { 3456 kprintf("Warning: pmap_enter called on UVA with " 3457 "kernel_pmap\n"); 3458 #ifdef DDB 3459 db_print_backtrace(); 3460 #endif 3461 } 3462 if (va >= UPT_MAX_ADDRESS && pmap != &kernel_pmap) { 3463 kprintf("Warning: pmap_enter called on KVA without" 3464 "kernel_pmap\n"); 3465 #ifdef DDB 3466 db_print_backtrace(); 3467 #endif 3468 } 3469 3470 /* 3471 * Get locked PV entries for our new page table entry (pte_pv) 3472 * and for its parent page table (pt_pv). We need the parent 3473 * so we can resolve the location of the ptep. 3474 * 3475 * Only hardware MMU actions can modify the ptep out from 3476 * under us. 3477 * 3478 * if (m) is fictitious or unmanaged we do not create a managing 3479 * pte_pv for it. Any pre-existing page's management state must 3480 * match (avoiding code complexity). 3481 * 3482 * If the pmap is still being initialized we assume existing 3483 * page tables. 3484 * 3485 * Kernel mapppings do not track page table pages (i.e. pt_pv). 3486 * pmap_allocpte() checks the 3487 */ 3488 if (pmap_initialized == FALSE) { 3489 pte_pv = NULL; 3490 pt_pv = NULL; 3491 ptep = vtopte(va); 3492 } else if (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) { /* XXX */ 3493 pte_pv = NULL; 3494 if (va >= VM_MAX_USER_ADDRESS) { 3495 pt_pv = NULL; 3496 ptep = vtopte(va); 3497 } else { 3498 pt_pv = pmap_allocpte_seg(pmap, pmap_pt_pindex(va), 3499 NULL, entry, va); 3500 ptep = pv_pte_lookup(pt_pv, pmap_pte_index(va)); 3501 } 3502 KKASSERT(*ptep == 0 || (*ptep & PG_MANAGED) == 0); 3503 } else { 3504 if (va >= VM_MAX_USER_ADDRESS) { 3505 /* 3506 * Kernel map, pv_entry-tracked. 3507 */ 3508 pt_pv = NULL; 3509 pte_pv = pmap_allocpte(pmap, pmap_pte_pindex(va), NULL); 3510 ptep = vtopte(va); 3511 } else { 3512 /* 3513 * User map 3514 */ 3515 pte_pv = pmap_allocpte_seg(pmap, pmap_pte_pindex(va), 3516 &pt_pv, entry, va); 3517 ptep = pv_pte_lookup(pt_pv, pmap_pte_index(va)); 3518 } 3519 KKASSERT(*ptep == 0 || (*ptep & PG_MANAGED)); 3520 } 3521 3522 pa = VM_PAGE_TO_PHYS(m); 3523 origpte = *ptep; 3524 opa = origpte & PG_FRAME; 3525 3526 newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V | PG_A); 3527 if (wired) 3528 newpte |= PG_W; 3529 if (va < VM_MAX_USER_ADDRESS) 3530 newpte |= PG_U; 3531 if (pte_pv) 3532 newpte |= PG_MANAGED; 3533 if (pmap == &kernel_pmap) 3534 newpte |= pgeflag; 3535 3536 /* 3537 * It is possible for multiple faults to occur in threaded 3538 * environments, the existing pte might be correct. 3539 */ 3540 if (((origpte ^ newpte) & ~(pt_entry_t)(PG_M|PG_A)) == 0) 3541 goto done; 3542 3543 if ((prot & VM_PROT_NOSYNC) == 0) 3544 pmap_inval_init(&info); 3545 3546 /* 3547 * Ok, either the address changed or the protection or wiring 3548 * changed. 3549 * 3550 * Clear the current entry, interlocking the removal. For managed 3551 * pte's this will also flush the modified state to the vm_page. 3552 * Atomic ops are mandatory in order to ensure that PG_M events are 3553 * not lost during any transition. 3554 */ 3555 if (opa) { 3556 if (pte_pv) { 3557 /* 3558 * pmap_remove_pv_pte() unwires pt_pv and assumes 3559 * we will free pte_pv, but since we are reusing 3560 * pte_pv we want to retain the wire count. 3561 * 3562 * pt_pv won't exist for a kernel page (managed or 3563 * otherwise). 3564 */ 3565 if (pt_pv) 3566 vm_page_wire_quick(pt_pv->pv_m); 3567 if (prot & VM_PROT_NOSYNC) 3568 pmap_remove_pv_pte(pte_pv, pt_pv, NULL); 3569 else 3570 pmap_remove_pv_pte(pte_pv, pt_pv, &info); 3571 if (pte_pv->pv_m) 3572 pmap_remove_pv_page(pte_pv); 3573 } else if (prot & VM_PROT_NOSYNC) { 3574 /* 3575 * Unmanaged page, NOSYNC (no mmu sync) requested. 3576 * 3577 * Leave wire count on PT page intact. 3578 */ 3579 (void)pte_load_clear(ptep); 3580 cpu_invlpg((void *)va); 3581 atomic_add_long(&pmap->pm_stats.resident_count, -1); 3582 } else { 3583 /* 3584 * Unmanaged page, normal enter. 3585 * 3586 * Leave wire count on PT page intact. 3587 */ 3588 pmap_inval_interlock(&info, pmap, va); 3589 (void)pte_load_clear(ptep); 3590 pmap_inval_deinterlock(&info, pmap); 3591 atomic_add_long(&pmap->pm_stats.resident_count, -1); 3592 } 3593 KKASSERT(*ptep == 0); 3594 } 3595 3596 if (pte_pv) { 3597 /* 3598 * Enter on the PV list if part of our managed memory. 3599 * Wiring of the PT page is already handled. 3600 */ 3601 KKASSERT(pte_pv->pv_m == NULL); 3602 vm_page_spin_lock(m); 3603 pte_pv->pv_m = m; 3604 TAILQ_INSERT_TAIL(&m->md.pv_list, pte_pv, pv_list); 3605 /* 3606 if (m->object) 3607 atomic_add_int(&m->object->agg_pv_list_count, 1); 3608 */ 3609 vm_page_flag_set(m, PG_MAPPED); 3610 vm_page_spin_unlock(m); 3611 } else if (pt_pv && opa == 0) { 3612 /* 3613 * We have to adjust the wire count on the PT page ourselves 3614 * for unmanaged entries. If opa was non-zero we retained 3615 * the existing wire count from the removal. 3616 */ 3617 vm_page_wire_quick(pt_pv->pv_m); 3618 } 3619 3620 /* 3621 * Kernel VMAs (pt_pv == NULL) require pmap invalidation interlocks. 3622 * 3623 * User VMAs do not because those will be zero->non-zero, so no 3624 * stale entries to worry about at this point. 3625 * 3626 * For KVM there appear to still be issues. Theoretically we 3627 * should be able to scrap the interlocks entirely but we 3628 * get crashes. 3629 */ 3630 if ((prot & VM_PROT_NOSYNC) == 0 && pt_pv == NULL) 3631 pmap_inval_interlock(&info, pmap, va); 3632 3633 /* 3634 * Set the pte 3635 */ 3636 *(volatile pt_entry_t *)ptep = newpte; 3637 3638 if ((prot & VM_PROT_NOSYNC) == 0 && pt_pv == NULL) 3639 pmap_inval_deinterlock(&info, pmap); 3640 else if (pt_pv == NULL) 3641 cpu_invlpg((void *)va); 3642 3643 if (wired) { 3644 if (pte_pv) { 3645 atomic_add_long(&pte_pv->pv_pmap->pm_stats.wired_count, 3646 1); 3647 } else { 3648 atomic_add_long(&pmap->pm_stats.wired_count, 1); 3649 } 3650 } 3651 if (newpte & PG_RW) 3652 vm_page_flag_set(m, PG_WRITEABLE); 3653 3654 /* 3655 * Unmanaged pages need manual resident_count tracking. 3656 */ 3657 if (pte_pv == NULL && pt_pv) 3658 atomic_add_long(&pt_pv->pv_pmap->pm_stats.resident_count, 1); 3659 3660 /* 3661 * Cleanup 3662 */ 3663 if ((prot & VM_PROT_NOSYNC) == 0 || pte_pv == NULL) 3664 pmap_inval_done(&info); 3665 done: 3666 KKASSERT((newpte & PG_MANAGED) == 0 || (m->flags & PG_MAPPED)); 3667 3668 /* 3669 * Cleanup the pv entry, allowing other accessors. 3670 */ 3671 if (pte_pv) 3672 pv_put(pte_pv); 3673 if (pt_pv) 3674 pv_put(pt_pv); 3675 } 3676 3677 /* 3678 * This code works like pmap_enter() but assumes VM_PROT_READ and not-wired. 3679 * This code also assumes that the pmap has no pre-existing entry for this 3680 * VA. 3681 * 3682 * This code currently may only be used on user pmaps, not kernel_pmap. 3683 */ 3684 void 3685 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m) 3686 { 3687 pmap_enter(pmap, va, m, VM_PROT_READ, FALSE, NULL); 3688 } 3689 3690 /* 3691 * Make a temporary mapping for a physical address. This is only intended 3692 * to be used for panic dumps. 3693 * 3694 * The caller is responsible for calling smp_invltlb(). 3695 */ 3696 void * 3697 pmap_kenter_temporary(vm_paddr_t pa, long i) 3698 { 3699 pmap_kenter_quick((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa); 3700 return ((void *)crashdumpmap); 3701 } 3702 3703 #define MAX_INIT_PT (96) 3704 3705 /* 3706 * This routine preloads the ptes for a given object into the specified pmap. 3707 * This eliminates the blast of soft faults on process startup and 3708 * immediately after an mmap. 3709 */ 3710 static int pmap_object_init_pt_callback(vm_page_t p, void *data); 3711 3712 void 3713 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_prot_t prot, 3714 vm_object_t object, vm_pindex_t pindex, 3715 vm_size_t size, int limit) 3716 { 3717 struct rb_vm_page_scan_info info; 3718 struct lwp *lp; 3719 vm_size_t psize; 3720 3721 /* 3722 * We can't preinit if read access isn't set or there is no pmap 3723 * or object. 3724 */ 3725 if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL) 3726 return; 3727 3728 /* 3729 * We can't preinit if the pmap is not the current pmap 3730 */ 3731 lp = curthread->td_lwp; 3732 if (lp == NULL || pmap != vmspace_pmap(lp->lwp_vmspace)) 3733 return; 3734 3735 /* 3736 * Misc additional checks 3737 */ 3738 psize = x86_64_btop(size); 3739 3740 if ((object->type != OBJT_VNODE) || 3741 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) && 3742 (object->resident_page_count > MAX_INIT_PT))) { 3743 return; 3744 } 3745 3746 if (pindex + psize > object->size) { 3747 if (object->size < pindex) 3748 return; 3749 psize = object->size - pindex; 3750 } 3751 3752 if (psize == 0) 3753 return; 3754 3755 /* 3756 * If everything is segment-aligned do not pre-init here. Instead 3757 * allow the normal vm_fault path to pass a segment hint to 3758 * pmap_enter() which will then use an object-referenced shared 3759 * page table page. 3760 */ 3761 if ((addr & SEG_MASK) == 0 && 3762 (ctob(psize) & SEG_MASK) == 0 && 3763 (ctob(pindex) & SEG_MASK) == 0) { 3764 return; 3765 } 3766 3767 /* 3768 * Use a red-black scan to traverse the requested range and load 3769 * any valid pages found into the pmap. 3770 * 3771 * We cannot safely scan the object's memq without holding the 3772 * object token. 3773 */ 3774 info.start_pindex = pindex; 3775 info.end_pindex = pindex + psize - 1; 3776 info.limit = limit; 3777 info.mpte = NULL; 3778 info.addr = addr; 3779 info.pmap = pmap; 3780 3781 vm_object_hold_shared(object); 3782 vm_page_rb_tree_RB_SCAN(&object->rb_memq, rb_vm_page_scancmp, 3783 pmap_object_init_pt_callback, &info); 3784 vm_object_drop(object); 3785 } 3786 3787 static 3788 int 3789 pmap_object_init_pt_callback(vm_page_t p, void *data) 3790 { 3791 struct rb_vm_page_scan_info *info = data; 3792 vm_pindex_t rel_index; 3793 3794 /* 3795 * don't allow an madvise to blow away our really 3796 * free pages allocating pv entries. 3797 */ 3798 if ((info->limit & MAP_PREFAULT_MADVISE) && 3799 vmstats.v_free_count < vmstats.v_free_reserved) { 3800 return(-1); 3801 } 3802 3803 /* 3804 * Ignore list markers and ignore pages we cannot instantly 3805 * busy (while holding the object token). 3806 */ 3807 if (p->flags & PG_MARKER) 3808 return 0; 3809 if (vm_page_busy_try(p, TRUE)) 3810 return 0; 3811 if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 3812 (p->flags & PG_FICTITIOUS) == 0) { 3813 if ((p->queue - p->pc) == PQ_CACHE) 3814 vm_page_deactivate(p); 3815 rel_index = p->pindex - info->start_pindex; 3816 pmap_enter_quick(info->pmap, 3817 info->addr + x86_64_ptob(rel_index), p); 3818 } 3819 vm_page_wakeup(p); 3820 lwkt_yield(); 3821 return(0); 3822 } 3823 3824 /* 3825 * Return TRUE if the pmap is in shape to trivially pre-fault the specified 3826 * address. 3827 * 3828 * Returns FALSE if it would be non-trivial or if a pte is already loaded 3829 * into the slot. 3830 * 3831 * XXX This is safe only because page table pages are not freed. 3832 */ 3833 int 3834 pmap_prefault_ok(pmap_t pmap, vm_offset_t addr) 3835 { 3836 pt_entry_t *pte; 3837 3838 /*spin_lock(&pmap->pm_spin);*/ 3839 if ((pte = pmap_pte(pmap, addr)) != NULL) { 3840 if (*pte & PG_V) { 3841 /*spin_unlock(&pmap->pm_spin);*/ 3842 return FALSE; 3843 } 3844 } 3845 /*spin_unlock(&pmap->pm_spin);*/ 3846 return TRUE; 3847 } 3848 3849 /* 3850 * Change the wiring attribute for a pmap/va pair. The mapping must already 3851 * exist in the pmap. The mapping may or may not be managed. 3852 */ 3853 void 3854 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired, 3855 vm_map_entry_t entry) 3856 { 3857 pt_entry_t *ptep; 3858 pv_entry_t pv; 3859 3860 if (pmap == NULL) 3861 return; 3862 lwkt_gettoken(&pmap->pm_token); 3863 pv = pmap_allocpte_seg(pmap, pmap_pt_pindex(va), NULL, entry, va); 3864 ptep = pv_pte_lookup(pv, pmap_pte_index(va)); 3865 3866 if (wired && !pmap_pte_w(ptep)) 3867 atomic_add_long(&pv->pv_pmap->pm_stats.wired_count, 1); 3868 else if (!wired && pmap_pte_w(ptep)) 3869 atomic_add_long(&pv->pv_pmap->pm_stats.wired_count, -1); 3870 3871 /* 3872 * Wiring is not a hardware characteristic so there is no need to 3873 * invalidate TLB. However, in an SMP environment we must use 3874 * a locked bus cycle to update the pte (if we are not using 3875 * the pmap_inval_*() API that is)... it's ok to do this for simple 3876 * wiring changes. 3877 */ 3878 if (wired) 3879 atomic_set_long(ptep, PG_W); 3880 else 3881 atomic_clear_long(ptep, PG_W); 3882 pv_put(pv); 3883 lwkt_reltoken(&pmap->pm_token); 3884 } 3885 3886 3887 3888 /* 3889 * Copy the range specified by src_addr/len from the source map to 3890 * the range dst_addr/len in the destination map. 3891 * 3892 * This routine is only advisory and need not do anything. 3893 */ 3894 void 3895 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 3896 vm_size_t len, vm_offset_t src_addr) 3897 { 3898 } 3899 3900 /* 3901 * pmap_zero_page: 3902 * 3903 * Zero the specified physical page. 3904 * 3905 * This function may be called from an interrupt and no locking is 3906 * required. 3907 */ 3908 void 3909 pmap_zero_page(vm_paddr_t phys) 3910 { 3911 vm_offset_t va = PHYS_TO_DMAP(phys); 3912 3913 pagezero((void *)va); 3914 } 3915 3916 /* 3917 * pmap_page_assertzero: 3918 * 3919 * Assert that a page is empty, panic if it isn't. 3920 */ 3921 void 3922 pmap_page_assertzero(vm_paddr_t phys) 3923 { 3924 vm_offset_t va = PHYS_TO_DMAP(phys); 3925 size_t i; 3926 3927 for (i = 0; i < PAGE_SIZE; i += sizeof(long)) { 3928 if (*(long *)((char *)va + i) != 0) { 3929 panic("pmap_page_assertzero() @ %p not zero!", 3930 (void *)(intptr_t)va); 3931 } 3932 } 3933 } 3934 3935 /* 3936 * pmap_zero_page: 3937 * 3938 * Zero part of a physical page by mapping it into memory and clearing 3939 * its contents with bzero. 3940 * 3941 * off and size may not cover an area beyond a single hardware page. 3942 */ 3943 void 3944 pmap_zero_page_area(vm_paddr_t phys, int off, int size) 3945 { 3946 vm_offset_t virt = PHYS_TO_DMAP(phys); 3947 3948 bzero((char *)virt + off, size); 3949 } 3950 3951 /* 3952 * pmap_copy_page: 3953 * 3954 * Copy the physical page from the source PA to the target PA. 3955 * This function may be called from an interrupt. No locking 3956 * is required. 3957 */ 3958 void 3959 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst) 3960 { 3961 vm_offset_t src_virt, dst_virt; 3962 3963 src_virt = PHYS_TO_DMAP(src); 3964 dst_virt = PHYS_TO_DMAP(dst); 3965 bcopy((void *)src_virt, (void *)dst_virt, PAGE_SIZE); 3966 } 3967 3968 /* 3969 * pmap_copy_page_frag: 3970 * 3971 * Copy the physical page from the source PA to the target PA. 3972 * This function may be called from an interrupt. No locking 3973 * is required. 3974 */ 3975 void 3976 pmap_copy_page_frag(vm_paddr_t src, vm_paddr_t dst, size_t bytes) 3977 { 3978 vm_offset_t src_virt, dst_virt; 3979 3980 src_virt = PHYS_TO_DMAP(src); 3981 dst_virt = PHYS_TO_DMAP(dst); 3982 3983 bcopy((char *)src_virt + (src & PAGE_MASK), 3984 (char *)dst_virt + (dst & PAGE_MASK), 3985 bytes); 3986 } 3987 3988 /* 3989 * Returns true if the pmap's pv is one of the first 16 pvs linked to from 3990 * this page. This count may be changed upwards or downwards in the future; 3991 * it is only necessary that true be returned for a small subset of pmaps 3992 * for proper page aging. 3993 */ 3994 boolean_t 3995 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3996 { 3997 pv_entry_t pv; 3998 int loops = 0; 3999 4000 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 4001 return FALSE; 4002 4003 vm_page_spin_lock(m); 4004 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4005 if (pv->pv_pmap == pmap) { 4006 vm_page_spin_unlock(m); 4007 return TRUE; 4008 } 4009 loops++; 4010 if (loops >= 16) 4011 break; 4012 } 4013 vm_page_spin_unlock(m); 4014 return (FALSE); 4015 } 4016 4017 /* 4018 * Remove all pages from specified address space this aids process exit 4019 * speeds. Also, this code may be special cased for the current process 4020 * only. 4021 */ 4022 void 4023 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 4024 { 4025 pmap_remove_noinval(pmap, sva, eva); 4026 cpu_invltlb(); 4027 } 4028 4029 /* 4030 * pmap_testbit tests bits in pte's note that the testbit/clearbit 4031 * routines are inline, and a lot of things compile-time evaluate. 4032 */ 4033 static 4034 boolean_t 4035 pmap_testbit(vm_page_t m, int bit) 4036 { 4037 pv_entry_t pv; 4038 pt_entry_t *pte; 4039 4040 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 4041 return FALSE; 4042 4043 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 4044 return FALSE; 4045 vm_page_spin_lock(m); 4046 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 4047 vm_page_spin_unlock(m); 4048 return FALSE; 4049 } 4050 4051 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4052 /* 4053 * if the bit being tested is the modified bit, then 4054 * mark clean_map and ptes as never 4055 * modified. 4056 */ 4057 if (bit & (PG_A|PG_M)) { 4058 if (!pmap_track_modified(pv->pv_pindex)) 4059 continue; 4060 } 4061 4062 #if defined(PMAP_DIAGNOSTIC) 4063 if (pv->pv_pmap == NULL) { 4064 kprintf("Null pmap (tb) at pindex: %"PRIu64"\n", 4065 pv->pv_pindex); 4066 continue; 4067 } 4068 #endif 4069 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_pindex << PAGE_SHIFT); 4070 if (*pte & bit) { 4071 vm_page_spin_unlock(m); 4072 return TRUE; 4073 } 4074 } 4075 vm_page_spin_unlock(m); 4076 return (FALSE); 4077 } 4078 4079 /* 4080 * This routine is used to modify bits in ptes. Only one bit should be 4081 * specified. PG_RW requires special handling. 4082 * 4083 * Caller must NOT hold any spin locks 4084 */ 4085 static __inline 4086 void 4087 pmap_clearbit(vm_page_t m, int bit) 4088 { 4089 struct pmap_inval_info info; 4090 pv_entry_t pv; 4091 pt_entry_t *pte; 4092 pt_entry_t pbits; 4093 pmap_t save_pmap; 4094 4095 if (bit == PG_RW) 4096 vm_page_flag_clear(m, PG_WRITEABLE); 4097 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 4098 return; 4099 } 4100 4101 /* 4102 * PG_M or PG_A case 4103 * 4104 * Loop over all current mappings setting/clearing as appropos If 4105 * setting RO do we need to clear the VAC? 4106 * 4107 * NOTE: When clearing PG_M we could also (not implemented) drop 4108 * through to the PG_RW code and clear PG_RW too, forcing 4109 * a fault on write to redetect PG_M for virtual kernels, but 4110 * it isn't necessary since virtual kernels invalidate the 4111 * pte when they clear the VPTE_M bit in their virtual page 4112 * tables. 4113 * 4114 * NOTE: Does not re-dirty the page when clearing only PG_M. 4115 */ 4116 if ((bit & PG_RW) == 0) { 4117 vm_page_spin_lock(m); 4118 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4119 #if defined(PMAP_DIAGNOSTIC) 4120 if (pv->pv_pmap == NULL) { 4121 kprintf("Null pmap (cb) at pindex: %"PRIu64"\n", 4122 pv->pv_pindex); 4123 continue; 4124 } 4125 #endif 4126 pte = pmap_pte_quick(pv->pv_pmap, 4127 pv->pv_pindex << PAGE_SHIFT); 4128 pbits = *pte; 4129 if (pbits & bit) 4130 atomic_clear_long(pte, bit); 4131 } 4132 vm_page_spin_unlock(m); 4133 return; 4134 } 4135 4136 /* 4137 * Clear PG_RW. Also clears PG_M and marks the page dirty if PG_M 4138 * was set. 4139 */ 4140 pmap_inval_init(&info); 4141 4142 restart: 4143 vm_page_spin_lock(m); 4144 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4145 /* 4146 * don't write protect pager mappings 4147 */ 4148 if (!pmap_track_modified(pv->pv_pindex)) 4149 continue; 4150 4151 #if defined(PMAP_DIAGNOSTIC) 4152 if (pv->pv_pmap == NULL) { 4153 kprintf("Null pmap (cb) at pindex: %"PRIu64"\n", 4154 pv->pv_pindex); 4155 continue; 4156 } 4157 #endif 4158 /* 4159 * Skip pages which do not have PG_RW set. 4160 */ 4161 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_pindex << PAGE_SHIFT); 4162 if ((*pte & PG_RW) == 0) 4163 continue; 4164 4165 /* 4166 * Lock the PV 4167 */ 4168 if (pv_hold_try(pv) == 0) { 4169 vm_page_spin_unlock(m); 4170 pv_lock(pv); /* held, now do a blocking lock */ 4171 pv_put(pv); /* and release */ 4172 goto restart; /* anything could have happened */ 4173 } 4174 4175 save_pmap = pv->pv_pmap; 4176 vm_page_spin_unlock(m); 4177 pmap_inval_interlock(&info, save_pmap, 4178 (vm_offset_t)pv->pv_pindex << PAGE_SHIFT); 4179 KKASSERT(pv->pv_pmap == save_pmap); 4180 for (;;) { 4181 pbits = *pte; 4182 cpu_ccfence(); 4183 if (atomic_cmpset_long(pte, pbits, 4184 pbits & ~(PG_RW|PG_M))) { 4185 break; 4186 } 4187 } 4188 pmap_inval_deinterlock(&info, save_pmap); 4189 vm_page_spin_lock(m); 4190 4191 /* 4192 * If PG_M was found to be set while we were clearing PG_RW 4193 * we also clear PG_M (done above) and mark the page dirty. 4194 * Callers expect this behavior. 4195 */ 4196 if (pbits & PG_M) 4197 vm_page_dirty(m); 4198 pv_put(pv); 4199 } 4200 vm_page_spin_unlock(m); 4201 pmap_inval_done(&info); 4202 } 4203 4204 /* 4205 * Lower the permission for all mappings to a given page. 4206 * 4207 * Page must be busied by caller. 4208 */ 4209 void 4210 pmap_page_protect(vm_page_t m, vm_prot_t prot) 4211 { 4212 /* JG NX support? */ 4213 if ((prot & VM_PROT_WRITE) == 0) { 4214 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 4215 /* 4216 * NOTE: pmap_clearbit(.. PG_RW) also clears 4217 * the PG_WRITEABLE flag in (m). 4218 */ 4219 pmap_clearbit(m, PG_RW); 4220 } else { 4221 pmap_remove_all(m); 4222 } 4223 } 4224 } 4225 4226 vm_paddr_t 4227 pmap_phys_address(vm_pindex_t ppn) 4228 { 4229 return (x86_64_ptob(ppn)); 4230 } 4231 4232 /* 4233 * Return a count of reference bits for a page, clearing those bits. 4234 * It is not necessary for every reference bit to be cleared, but it 4235 * is necessary that 0 only be returned when there are truly no 4236 * reference bits set. 4237 * 4238 * XXX: The exact number of bits to check and clear is a matter that 4239 * should be tested and standardized at some point in the future for 4240 * optimal aging of shared pages. 4241 * 4242 * This routine may not block. 4243 */ 4244 int 4245 pmap_ts_referenced(vm_page_t m) 4246 { 4247 pv_entry_t pv; 4248 pt_entry_t *pte; 4249 int rtval = 0; 4250 4251 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 4252 return (rtval); 4253 4254 vm_page_spin_lock(m); 4255 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4256 if (!pmap_track_modified(pv->pv_pindex)) 4257 continue; 4258 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_pindex << PAGE_SHIFT); 4259 if (pte && (*pte & PG_A)) { 4260 atomic_clear_long(pte, PG_A); 4261 rtval++; 4262 if (rtval > 4) 4263 break; 4264 } 4265 } 4266 vm_page_spin_unlock(m); 4267 return (rtval); 4268 } 4269 4270 /* 4271 * pmap_is_modified: 4272 * 4273 * Return whether or not the specified physical page was modified 4274 * in any physical maps. 4275 */ 4276 boolean_t 4277 pmap_is_modified(vm_page_t m) 4278 { 4279 boolean_t res; 4280 4281 res = pmap_testbit(m, PG_M); 4282 return (res); 4283 } 4284 4285 /* 4286 * Clear the modify bits on the specified physical page. 4287 */ 4288 void 4289 pmap_clear_modify(vm_page_t m) 4290 { 4291 pmap_clearbit(m, PG_M); 4292 } 4293 4294 /* 4295 * pmap_clear_reference: 4296 * 4297 * Clear the reference bit on the specified physical page. 4298 */ 4299 void 4300 pmap_clear_reference(vm_page_t m) 4301 { 4302 pmap_clearbit(m, PG_A); 4303 } 4304 4305 /* 4306 * Miscellaneous support routines follow 4307 */ 4308 4309 static 4310 void 4311 i386_protection_init(void) 4312 { 4313 int *kp, prot; 4314 4315 /* JG NX support may go here; No VM_PROT_EXECUTE ==> set NX bit */ 4316 kp = protection_codes; 4317 for (prot = 0; prot < 8; prot++) { 4318 switch (prot) { 4319 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 4320 /* 4321 * Read access is also 0. There isn't any execute bit, 4322 * so just make it readable. 4323 */ 4324 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 4325 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 4326 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 4327 *kp++ = 0; 4328 break; 4329 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 4330 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 4331 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 4332 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 4333 *kp++ = PG_RW; 4334 break; 4335 } 4336 } 4337 } 4338 4339 /* 4340 * Map a set of physical memory pages into the kernel virtual 4341 * address space. Return a pointer to where it is mapped. This 4342 * routine is intended to be used for mapping device memory, 4343 * NOT real memory. 4344 * 4345 * NOTE: we can't use pgeflag unless we invalidate the pages one at 4346 * a time. 4347 */ 4348 void * 4349 pmap_mapdev(vm_paddr_t pa, vm_size_t size) 4350 { 4351 vm_offset_t va, tmpva, offset; 4352 pt_entry_t *pte; 4353 4354 offset = pa & PAGE_MASK; 4355 size = roundup(offset + size, PAGE_SIZE); 4356 4357 va = kmem_alloc_nofault(&kernel_map, size, PAGE_SIZE); 4358 if (va == 0) 4359 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 4360 4361 pa = pa & ~PAGE_MASK; 4362 for (tmpva = va; size > 0;) { 4363 pte = vtopte(tmpva); 4364 *pte = pa | PG_RW | PG_V; /* | pgeflag; */ 4365 size -= PAGE_SIZE; 4366 tmpva += PAGE_SIZE; 4367 pa += PAGE_SIZE; 4368 } 4369 cpu_invltlb(); 4370 smp_invltlb(); 4371 4372 return ((void *)(va + offset)); 4373 } 4374 4375 void * 4376 pmap_mapdev_uncacheable(vm_paddr_t pa, vm_size_t size) 4377 { 4378 vm_offset_t va, tmpva, offset; 4379 pt_entry_t *pte; 4380 4381 offset = pa & PAGE_MASK; 4382 size = roundup(offset + size, PAGE_SIZE); 4383 4384 va = kmem_alloc_nofault(&kernel_map, size, PAGE_SIZE); 4385 if (va == 0) 4386 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 4387 4388 pa = pa & ~PAGE_MASK; 4389 for (tmpva = va; size > 0;) { 4390 pte = vtopte(tmpva); 4391 *pte = pa | PG_RW | PG_V | PG_N; /* | pgeflag; */ 4392 size -= PAGE_SIZE; 4393 tmpva += PAGE_SIZE; 4394 pa += PAGE_SIZE; 4395 } 4396 cpu_invltlb(); 4397 smp_invltlb(); 4398 4399 return ((void *)(va + offset)); 4400 } 4401 4402 void 4403 pmap_unmapdev(vm_offset_t va, vm_size_t size) 4404 { 4405 vm_offset_t base, offset; 4406 4407 base = va & ~PAGE_MASK; 4408 offset = va & PAGE_MASK; 4409 size = roundup(offset + size, PAGE_SIZE); 4410 pmap_qremove(va, size >> PAGE_SHIFT); 4411 kmem_free(&kernel_map, base, size); 4412 } 4413 4414 /* 4415 * perform the pmap work for mincore 4416 */ 4417 int 4418 pmap_mincore(pmap_t pmap, vm_offset_t addr) 4419 { 4420 pt_entry_t *ptep, pte; 4421 vm_page_t m; 4422 int val = 0; 4423 4424 lwkt_gettoken(&pmap->pm_token); 4425 ptep = pmap_pte(pmap, addr); 4426 4427 if (ptep && (pte = *ptep) != 0) { 4428 vm_offset_t pa; 4429 4430 val = MINCORE_INCORE; 4431 if ((pte & PG_MANAGED) == 0) 4432 goto done; 4433 4434 pa = pte & PG_FRAME; 4435 4436 m = PHYS_TO_VM_PAGE(pa); 4437 4438 /* 4439 * Modified by us 4440 */ 4441 if (pte & PG_M) 4442 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 4443 /* 4444 * Modified by someone 4445 */ 4446 else if (m->dirty || pmap_is_modified(m)) 4447 val |= MINCORE_MODIFIED_OTHER; 4448 /* 4449 * Referenced by us 4450 */ 4451 if (pte & PG_A) 4452 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 4453 4454 /* 4455 * Referenced by someone 4456 */ 4457 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { 4458 val |= MINCORE_REFERENCED_OTHER; 4459 vm_page_flag_set(m, PG_REFERENCED); 4460 } 4461 } 4462 done: 4463 lwkt_reltoken(&pmap->pm_token); 4464 4465 return val; 4466 } 4467 4468 /* 4469 * Replace p->p_vmspace with a new one. If adjrefs is non-zero the new 4470 * vmspace will be ref'd and the old one will be deref'd. 4471 * 4472 * The vmspace for all lwps associated with the process will be adjusted 4473 * and cr3 will be reloaded if any lwp is the current lwp. 4474 * 4475 * The process must hold the vmspace->vm_map.token for oldvm and newvm 4476 */ 4477 void 4478 pmap_replacevm(struct proc *p, struct vmspace *newvm, int adjrefs) 4479 { 4480 struct vmspace *oldvm; 4481 struct lwp *lp; 4482 4483 oldvm = p->p_vmspace; 4484 if (oldvm != newvm) { 4485 if (adjrefs) 4486 sysref_get(&newvm->vm_sysref); 4487 p->p_vmspace = newvm; 4488 KKASSERT(p->p_nthreads == 1); 4489 lp = RB_ROOT(&p->p_lwp_tree); 4490 pmap_setlwpvm(lp, newvm); 4491 if (adjrefs) 4492 sysref_put(&oldvm->vm_sysref); 4493 } 4494 } 4495 4496 /* 4497 * Set the vmspace for a LWP. The vmspace is almost universally set the 4498 * same as the process vmspace, but virtual kernels need to swap out contexts 4499 * on a per-lwp basis. 4500 * 4501 * Caller does not necessarily hold any vmspace tokens. Caller must control 4502 * the lwp (typically be in the context of the lwp). We use a critical 4503 * section to protect against statclock and hardclock (statistics collection). 4504 */ 4505 void 4506 pmap_setlwpvm(struct lwp *lp, struct vmspace *newvm) 4507 { 4508 struct vmspace *oldvm; 4509 struct pmap *pmap; 4510 4511 oldvm = lp->lwp_vmspace; 4512 4513 if (oldvm != newvm) { 4514 crit_enter(); 4515 lp->lwp_vmspace = newvm; 4516 if (curthread->td_lwp == lp) { 4517 pmap = vmspace_pmap(newvm); 4518 atomic_set_cpumask(&pmap->pm_active, mycpu->gd_cpumask); 4519 if (pmap->pm_active & CPUMASK_LOCK) 4520 pmap_interlock_wait(newvm); 4521 #if defined(SWTCH_OPTIM_STATS) 4522 tlb_flush_count++; 4523 #endif 4524 curthread->td_pcb->pcb_cr3 = vtophys(pmap->pm_pml4); 4525 curthread->td_pcb->pcb_cr3 |= PG_RW | PG_U | PG_V; 4526 load_cr3(curthread->td_pcb->pcb_cr3); 4527 pmap = vmspace_pmap(oldvm); 4528 atomic_clear_cpumask(&pmap->pm_active, mycpu->gd_cpumask); 4529 } 4530 crit_exit(); 4531 } 4532 } 4533 4534 /* 4535 * Called when switching to a locked pmap, used to interlock against pmaps 4536 * undergoing modifications to prevent us from activating the MMU for the 4537 * target pmap until all such modifications have completed. We have to do 4538 * this because the thread making the modifications has already set up its 4539 * SMP synchronization mask. 4540 * 4541 * This function cannot sleep! 4542 * 4543 * No requirements. 4544 */ 4545 void 4546 pmap_interlock_wait(struct vmspace *vm) 4547 { 4548 struct pmap *pmap = &vm->vm_pmap; 4549 4550 if (pmap->pm_active & CPUMASK_LOCK) { 4551 crit_enter(); 4552 KKASSERT(curthread->td_critcount >= 2); 4553 DEBUG_PUSH_INFO("pmap_interlock_wait"); 4554 while (pmap->pm_active & CPUMASK_LOCK) { 4555 cpu_ccfence(); 4556 lwkt_process_ipiq(); 4557 } 4558 DEBUG_POP_INFO(); 4559 crit_exit(); 4560 } 4561 } 4562 4563 vm_offset_t 4564 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 4565 { 4566 4567 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 4568 return addr; 4569 } 4570 4571 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 4572 return addr; 4573 } 4574 4575 /* 4576 * Used by kmalloc/kfree, page already exists at va 4577 */ 4578 vm_page_t 4579 pmap_kvtom(vm_offset_t va) 4580 { 4581 return(PHYS_TO_VM_PAGE(*vtopte(va) & PG_FRAME)); 4582 } 4583 4584 /* 4585 * Initialize machine-specific shared page directory support. This 4586 * is executed when a VM object is created. 4587 */ 4588 void 4589 pmap_object_init(vm_object_t object) 4590 { 4591 object->md.pmap_rw = NULL; 4592 object->md.pmap_ro = NULL; 4593 } 4594 4595 /* 4596 * Clean up machine-specific shared page directory support. This 4597 * is executed when a VM object is destroyed. 4598 */ 4599 void 4600 pmap_object_free(vm_object_t object) 4601 { 4602 pmap_t pmap; 4603 4604 if ((pmap = object->md.pmap_rw) != NULL) { 4605 object->md.pmap_rw = NULL; 4606 pmap_remove_noinval(pmap, 4607 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 4608 pmap->pm_active = 0; 4609 pmap_release(pmap); 4610 pmap_puninit(pmap); 4611 kfree(pmap, M_OBJPMAP); 4612 } 4613 if ((pmap = object->md.pmap_ro) != NULL) { 4614 object->md.pmap_ro = NULL; 4615 pmap_remove_noinval(pmap, 4616 VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS); 4617 pmap->pm_active = 0; 4618 pmap_release(pmap); 4619 pmap_puninit(pmap); 4620 kfree(pmap, M_OBJPMAP); 4621 } 4622 } 4623