1 /* $OpenBSD: pmap.c,v 1.62 2011/04/15 15:16:57 chl Exp $ */ 2 /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ 3 4 /* 5 * 6 * Copyright (c) 1997 Charles D. Cranor and Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by Charles D. Cranor and 20 * Washington University. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright 2001 (c) Wasabi Systems, Inc. 38 * All rights reserved. 39 * 40 * Written by Frank van der Linden for Wasabi Systems, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed for the NetBSD Project by 53 * Wasabi Systems, Inc. 54 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 55 * or promote products derived from this software without specific prior 56 * written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 60 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 61 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 62 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 63 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 64 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 65 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 66 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 67 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 68 * POSSIBILITY OF SUCH DAMAGE. 69 */ 70 71 /* 72 * This is the i386 pmap modified and generalized to support x86-64 73 * as well. The idea is to hide the upper N levels of the page tables 74 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest 75 * is mostly untouched, except that it uses some more generalized 76 * macros and interfaces. 77 * 78 * This pmap has been tested on the i386 as well, and it can be easily 79 * adapted to PAE. 80 * 81 * fvdl@wasabisystems.com 18-Jun-2001 82 */ 83 84 /* 85 * pmap.c: i386 pmap module rewrite 86 * Chuck Cranor <chuck@ccrc.wustl.edu> 87 * 11-Aug-97 88 * 89 * history of this pmap module: in addition to my own input, i used 90 * the following references for this rewrite of the i386 pmap: 91 * 92 * [1] the NetBSD i386 pmap. this pmap appears to be based on the 93 * BSD hp300 pmap done by Mike Hibler at University of Utah. 94 * it was then ported to the i386 by William Jolitz of UUNET 95 * Technologies, Inc. Then Charles M. Hannum of the NetBSD 96 * project fixed some bugs and provided some speed ups. 97 * 98 * [2] the FreeBSD i386 pmap. this pmap seems to be the 99 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson 100 * and David Greenman. 101 * 102 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated 103 * between several processors. the VAX version was done by 104 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 105 * version was done by Lance Berc, Mike Kupfer, Bob Baron, 106 * David Golub, and Richard Draves. the alpha version was 107 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou 108 * (NetBSD/alpha). 109 */ 110 111 #include <sys/param.h> 112 #include <sys/systm.h> 113 #include <sys/proc.h> 114 #include <sys/malloc.h> 115 #include <sys/pool.h> 116 #include <sys/user.h> 117 #include <sys/kernel.h> 118 #include <sys/mutex.h> 119 #include <sys/sched.h> 120 121 #include <uvm/uvm.h> 122 123 #include <machine/atomic.h> 124 #include <machine/lock.h> 125 #include <machine/cpu.h> 126 #include <machine/specialreg.h> 127 #include <machine/gdt.h> 128 129 #include <dev/isa/isareg.h> 130 #include <machine/isa_machdep.h> 131 132 /* 133 * general info: 134 * 135 * - for an explanation of how the i386 MMU hardware works see 136 * the comments in <machine/pte.h>. 137 * 138 * - for an explanation of the general memory structure used by 139 * this pmap (including the recursive mapping), see the comments 140 * in <machine/pmap.h>. 141 * 142 * this file contains the code for the "pmap module." the module's 143 * job is to manage the hardware's virtual to physical address mappings. 144 * note that there are two levels of mapping in the VM system: 145 * 146 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's 147 * to map ranges of virtual address space to objects/files. for 148 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only 149 * to the file /bin/ls starting at offset zero." note that 150 * the upper layer mapping is not concerned with how individual 151 * vm_pages are mapped. 152 * 153 * [2] the lower layer of the VM system (the pmap) maintains the mappings 154 * from virtual addresses. it is concerned with which vm_page is 155 * mapped where. for example, when you run /bin/ls and start 156 * at page 0x1000 the fault routine may lookup the correct page 157 * of the /bin/ls file and then ask the pmap layer to establish 158 * a mapping for it. 159 * 160 * note that information in the lower layer of the VM system can be 161 * thrown away since it can easily be reconstructed from the info 162 * in the upper layer. 163 * 164 * data structures we use include: 165 * 166 * - struct pmap: describes the address space of one thread 167 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA 168 * - pmap_remove_record: a list of virtual addresses whose mappings 169 * have been changed. used for TLB flushing. 170 */ 171 172 /* 173 * memory allocation 174 * 175 * - there are three data structures that we must dynamically allocate: 176 * 177 * [A] new process' page directory page (PDP) 178 * - plan 1: done at pmap_create() we use 179 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this 180 * allocation. 181 * 182 * if we are low in free physical memory then we sleep in 183 * uvm_km_alloc -- in this case this is ok since we are creating 184 * a new pmap and should not be holding any locks. 185 * 186 * if the kernel is totally out of virtual space 187 * (i.e. uvm_km_alloc returns NULL), then we panic. 188 * 189 * XXX: the fork code currently has no way to return an "out of 190 * memory, try again" error code since uvm_fork [fka vm_fork] 191 * is a void function. 192 * 193 * [B] new page tables pages (PTP) 194 * call uvm_pagealloc() 195 * => success: zero page, add to pm_pdir 196 * => failure: we are out of free vm_pages, let pmap_enter() 197 * tell UVM about it. 198 * 199 * note: for kernel PTPs, we start with NKPTP of them. as we map 200 * kernel memory (at uvm_map time) we check to see if we've grown 201 * the kernel pmap. if so, we call the optional function 202 * pmap_growkernel() to grow the kernel PTPs in advance. 203 * 204 * [C] pv_entry structures 205 * - try to allocate one from the pool. 206 * If we fail, we simply let pmap_enter() tell UVM about it. 207 */ 208 209 vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; 210 int ptp_shifts[] = PTP_SHIFT_INITIALIZER; 211 long nkptp[] = NKPTP_INITIALIZER; 212 long nkptpmax[] = NKPTPMAX_INITIALIZER; 213 long nbpd[] = NBPD_INITIALIZER; 214 pd_entry_t *normal_pdes[] = PDES_INITIALIZER; 215 pd_entry_t *alternate_pdes[] = APDES_INITIALIZER; 216 217 /* int nkpde = NKPTP; */ 218 219 #define PMAP_MAP_TO_HEAD_LOCK() /* null */ 220 #define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ 221 222 #define PMAP_HEAD_TO_MAP_LOCK() /* null */ 223 #define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ 224 225 #define COUNT(x) /* nothing */ 226 227 /* 228 * global data structures 229 */ 230 231 struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ 232 233 /* 234 * pmap_pg_g: if our processor supports PG_G in the PTE then we 235 * set pmap_pg_g to PG_G (otherwise it is zero). 236 */ 237 238 int pmap_pg_g = 0; 239 240 /* 241 * pmap_pg_wc: if our processor supports PAT then we set this 242 * to be the pte bits for Write Combining. Else we fall back to 243 * UC- so mtrrs can override the cacheability; 244 */ 245 int pmap_pg_wc = PG_UCMINUS; 246 247 /* 248 * other data structures 249 */ 250 251 pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ 252 boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ 253 254 /* 255 * pv management structures. 256 */ 257 struct pool pmap_pv_pool; 258 259 /* 260 * linked list of all non-kernel pmaps 261 */ 262 263 struct pmap_head pmaps; 264 265 /* 266 * pool that pmap structures are allocated from 267 */ 268 269 struct pool pmap_pmap_pool; 270 271 /* 272 * When we're freeing a ptp, we need to delay the freeing until all 273 * tlb shootdown has been done. This is the list of the to-be-freed pages. 274 */ 275 TAILQ_HEAD(pg_to_free, vm_page); 276 277 /* 278 * pool that PDPs are allocated from 279 */ 280 281 struct pool pmap_pdp_pool; 282 u_int pmap_pdp_cache_generation; 283 284 int pmap_pdp_ctor(void *, void *, int); 285 286 extern vaddr_t msgbuf_vaddr; 287 extern paddr_t msgbuf_paddr; 288 289 extern vaddr_t idt_vaddr; /* we allocate IDT early */ 290 extern paddr_t idt_paddr; 291 292 #ifdef _LP64 293 extern vaddr_t lo32_vaddr; 294 extern vaddr_t lo32_paddr; 295 #endif 296 297 vaddr_t virtual_avail; 298 extern int end; 299 300 /* 301 * local prototypes 302 */ 303 304 void pmap_enter_pv(struct vm_page *, struct pv_entry *, struct pmap *, 305 vaddr_t, struct vm_page *); 306 struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, pd_entry_t **); 307 struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); 308 void pmap_free_ptp(struct pmap *, struct vm_page *, 309 vaddr_t, pt_entry_t *, pd_entry_t **, struct pg_to_free *); 310 void pmap_freepage(struct pmap *, struct vm_page *, int, struct pg_to_free *); 311 static boolean_t pmap_is_active(struct pmap *, int); 312 void pmap_map_ptes(struct pmap *, pt_entry_t **, pd_entry_t ***); 313 struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t); 314 void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int); 315 boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, 316 vaddr_t, int); 317 void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, 318 vaddr_t, vaddr_t, int); 319 #define PMAP_REMOVE_ALL 0 /* remove all mappings */ 320 #define PMAP_REMOVE_SKIPWIRED 1 /* skip wired mappings */ 321 322 void pmap_unmap_ptes(struct pmap *); 323 boolean_t pmap_get_physpage(vaddr_t, int, paddr_t *); 324 boolean_t pmap_pdes_valid(vaddr_t, pd_entry_t **, pd_entry_t *); 325 void pmap_alloc_level(pd_entry_t **, vaddr_t, int, long *); 326 void pmap_apte_flush(struct pmap *pmap); 327 328 void pmap_sync_flags_pte(struct vm_page *, u_long); 329 330 /* 331 * p m a p i n l i n e h e l p e r f u n c t i o n s 332 */ 333 334 /* 335 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? 336 * of course the kernel is always loaded 337 */ 338 339 static __inline boolean_t 340 pmap_is_curpmap(struct pmap *pmap) 341 { 342 return((pmap == pmap_kernel()) || 343 (pmap->pm_pdirpa == (paddr_t) rcr3())); 344 } 345 346 /* 347 * pmap_is_active: is this pmap loaded into the specified processor's %cr3? 348 */ 349 350 static __inline boolean_t 351 pmap_is_active(struct pmap *pmap, int cpu_id) 352 { 353 return (pmap == pmap_kernel() || 354 (pmap->pm_cpus & (1ULL << cpu_id)) != 0); 355 } 356 357 static __inline u_int 358 pmap_pte2flags(u_long pte) 359 { 360 return (((pte & PG_U) ? PG_PMAP_REF : 0) | 361 ((pte & PG_M) ? PG_PMAP_MOD : 0)); 362 } 363 364 void 365 pmap_sync_flags_pte(struct vm_page *pg, u_long pte) 366 { 367 if (pte & (PG_U|PG_M)) { 368 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(pte)); 369 } 370 } 371 372 void 373 pmap_apte_flush(struct pmap *pmap) 374 { 375 pmap_tlb_shoottlb(); 376 pmap_tlb_shootwait(); 377 } 378 379 /* 380 * pmap_map_ptes: map a pmap's PTEs into KVM 381 * 382 * => we lock enough pmaps to keep things locked in 383 * => must be undone with pmap_unmap_ptes before returning 384 */ 385 386 void 387 pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp) 388 { 389 pd_entry_t opde, npde; 390 391 /* if curpmap then we are always mapped */ 392 if (pmap_is_curpmap(pmap)) { 393 *ptepp = PTE_BASE; 394 *pdeppp = normal_pdes; 395 return; 396 } 397 398 /* need to load a new alternate pt space into curpmap? */ 399 opde = *APDP_PDE; 400 if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { 401 npde = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); 402 *APDP_PDE = npde; 403 if (pmap_valid_entry(opde)) 404 pmap_apte_flush(curpcb->pcb_pmap); 405 } 406 *ptepp = APTE_BASE; 407 *pdeppp = alternate_pdes; 408 } 409 410 void 411 pmap_unmap_ptes(struct pmap *pmap) 412 { 413 if (pmap_is_curpmap(pmap)) 414 return; 415 416 #if defined(MULTIPROCESSOR) 417 *APDP_PDE = 0; 418 pmap_apte_flush(curpcb->pcb_pmap); 419 #endif 420 COUNT(apdp_pde_unmap); 421 } 422 423 /* 424 * p m a p k e n t e r f u n c t i o n s 425 * 426 * functions to quickly enter/remove pages from the kernel address 427 * space. pmap_kremove is exported to MI kernel. we make use of 428 * the recursive PTE mappings. 429 */ 430 431 /* 432 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking 433 * 434 * => no need to lock anything, assume va is already allocated 435 * => should be faster than normal pmap enter function 436 */ 437 438 void 439 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 440 { 441 pt_entry_t *pte, opte, npte; 442 443 pte = kvtopte(va); 444 445 npte = (pa & PMAP_PA_MASK) | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) | 446 ((pa & PMAP_NOCACHE) ? PG_N : 0) | 447 ((pa & PMAP_WC) ? pmap_pg_wc : 0) | PG_V; 448 449 /* special 1:1 mappings in the first 2MB must not be global */ 450 if (va >= (vaddr_t)NBPD_L2) 451 npte |= pmap_pg_g; 452 453 if ((cpu_feature & CPUID_NXE) && !(prot & VM_PROT_EXECUTE)) 454 npte |= PG_NX; 455 opte = pmap_pte_set(pte, npte); 456 #ifdef LARGEPAGES 457 /* XXX For now... */ 458 if (opte & PG_PS) 459 panic("pmap_kenter_pa: PG_PS"); 460 #endif 461 if (pmap_valid_entry(opte)) { 462 if (pa & PMAP_NOCACHE && (opte & PG_N) == 0) 463 wbinvd(); 464 /* This shouldn't happen */ 465 pmap_tlb_shootpage(pmap_kernel(), va); 466 pmap_tlb_shootwait(); 467 } 468 } 469 470 /* 471 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking 472 * 473 * => no need to lock anything 474 * => caller must dispose of any vm_page mapped in the va range 475 * => note: not an inline function 476 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE 477 * => we assume kernel only unmaps valid addresses and thus don't bother 478 * checking the valid bit before doing TLB flushing 479 */ 480 481 void 482 pmap_kremove(vaddr_t sva, vsize_t len) 483 { 484 pt_entry_t *pte, opte; 485 vaddr_t va, eva; 486 487 eva = sva + len; 488 489 for (va = sva; va != eva; va += PAGE_SIZE) { 490 pte = kvtopte(va); 491 492 opte = pmap_pte_set(pte, 0); 493 #ifdef LARGEPAGES 494 KASSERT((opte & PG_PS) == 0); 495 #endif 496 KASSERT((opte & PG_PVLIST) == 0); 497 } 498 499 pmap_tlb_shootrange(pmap_kernel(), sva, eva); 500 pmap_tlb_shootwait(); 501 } 502 503 /* 504 * p m a p i n i t f u n c t i o n s 505 * 506 * pmap_bootstrap and pmap_init are called during system startup 507 * to init the pmap module. pmap_bootstrap() does a low level 508 * init just to get things rolling. pmap_init() finishes the job. 509 */ 510 511 /* 512 * pmap_bootstrap: get the system in a state where it can run with VM 513 * properly enabled (called before main()). the VM system is 514 * fully init'd later... 515 * 516 * => on i386, locore.s has already enabled the MMU by allocating 517 * a PDP for the kernel, and nkpde PTP's for the kernel. 518 * => kva_start is the first free virtual address in kernel space 519 */ 520 521 paddr_t 522 pmap_bootstrap(paddr_t first_avail, paddr_t max_pa) 523 { 524 vaddr_t kva, kva_end, kva_start = VM_MIN_KERNEL_ADDRESS; 525 struct pmap *kpm; 526 int i; 527 unsigned long p1i; 528 pt_entry_t pg_nx = (cpu_feature & CPUID_NXE? PG_NX : 0); 529 long ndmpdp; 530 paddr_t dmpd, dmpdp; 531 532 /* 533 * define the boundaries of the managed kernel virtual address 534 * space. 535 */ 536 537 virtual_avail = kva_start; /* first free KVA */ 538 539 /* 540 * set up protection_codes: we need to be able to convert from 541 * a MI protection code (some combo of VM_PROT...) to something 542 * we can jam into a i386 PTE. 543 */ 544 545 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */ 546 protection_codes[VM_PROT_EXECUTE] = PG_RO; /* --x */ 547 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */ 548 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO; /* -rx */ 549 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */ 550 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW;/* w-x */ 551 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx; 552 /* wr- */ 553 protection_codes[VM_PROT_ALL] = PG_RW; /* wrx */ 554 555 /* 556 * now we init the kernel's pmap 557 * 558 * the kernel pmap's pm_obj is not used for much. however, in 559 * user pmaps the pm_obj contains the list of active PTPs. 560 * the pm_obj currently does not have a pager. it might be possible 561 * to add a pager that would allow a process to read-only mmap its 562 * own page tables (fast user level vtophys?). this may or may not 563 * be useful. 564 */ 565 566 kpm = pmap_kernel(); 567 for (i = 0; i < PTP_LEVELS - 1; i++) { 568 uvm_objinit(&kpm->pm_obj[i], NULL, 1); 569 kpm->pm_ptphint[i] = NULL; 570 } 571 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ 572 kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); 573 kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3; 574 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = 575 atop(kva_start - VM_MIN_KERNEL_ADDRESS); 576 577 /* 578 * the above is just a rough estimate and not critical to the proper 579 * operation of the system. 580 */ 581 582 curpcb->pcb_pmap = kpm; /* proc0's pcb */ 583 584 /* 585 * enable global TLB entries. 586 */ 587 pmap_pg_g = PG_G; /* enable software */ 588 589 /* add PG_G attribute to already mapped kernel pages */ 590 #if KERNBASE == VM_MIN_KERNEL_ADDRESS 591 for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ; 592 #else 593 kva_end = roundup((vaddr_t)&end, PAGE_SIZE); 594 for (kva = KERNBASE; kva < kva_end ; 595 #endif 596 kva += PAGE_SIZE) { 597 p1i = pl1_i(kva); 598 if (pmap_valid_entry(PTE_BASE[p1i])) 599 PTE_BASE[p1i] |= PG_G; 600 } 601 602 /* 603 * Map the direct map. The first 4GB were mapped in locore, here 604 * we map the rest if it exists. We actually use the direct map 605 * here to set up the page tables, we're assuming that we're still 606 * operating in the lower 4GB of memory. 607 */ 608 ndmpdp = (max_pa + NBPD_L3 - 1) >> L3_SHIFT; 609 if (ndmpdp < NDML2_ENTRIES) 610 ndmpdp = NDML2_ENTRIES; /* At least 4GB */ 611 612 dmpdp = kpm->pm_pdir[PDIR_SLOT_DIRECT] & PG_FRAME; 613 614 dmpd = first_avail; first_avail += ndmpdp * PAGE_SIZE; 615 616 for (i = NDML2_ENTRIES; i < NPDPG * ndmpdp; i++) { 617 paddr_t pdp; 618 vaddr_t va; 619 620 pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]); 621 va = PMAP_DIRECT_MAP(pdp); 622 623 *((pd_entry_t *)va) = ((paddr_t)i << L2_SHIFT); 624 *((pd_entry_t *)va) |= PG_RW | PG_V | PG_PS | PG_G | PG_U | 625 PG_M; 626 } 627 628 for (i = NDML2_ENTRIES; i < ndmpdp; i++) { 629 paddr_t pdp; 630 vaddr_t va; 631 632 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]); 633 va = PMAP_DIRECT_MAP(pdp); 634 635 *((pd_entry_t *)va) = dmpd + (i << PAGE_SHIFT); 636 *((pd_entry_t *)va) |= PG_RW | PG_V | PG_U | PG_M; 637 } 638 639 kpm->pm_pdir[PDIR_SLOT_DIRECT] = dmpdp | PG_V | PG_KW | PG_U | 640 PG_M; 641 642 tlbflush(); 643 644 msgbuf_vaddr = virtual_avail; 645 virtual_avail += round_page(MSGBUFSIZE); 646 647 idt_vaddr = virtual_avail; 648 virtual_avail += 2 * PAGE_SIZE; 649 idt_paddr = first_avail; /* steal a page */ 650 first_avail += 2 * PAGE_SIZE; 651 652 #ifdef _LP64 653 /* 654 * Grab a page below 4G for things that need it (i.e. 655 * having an initial %cr3 for the MP trampoline). 656 */ 657 lo32_vaddr = virtual_avail; 658 virtual_avail += PAGE_SIZE; 659 lo32_paddr = first_avail; 660 first_avail += PAGE_SIZE; 661 #endif 662 /* 663 * init the global lists. 664 */ 665 LIST_INIT(&pmaps); 666 667 /* 668 * initialize the pmap pool. 669 */ 670 671 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", 672 &pool_allocator_nointr); 673 pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl", 674 &pool_allocator_nointr); 675 pool_sethiwat(&pmap_pv_pool, 32 * 1024); 676 677 /* 678 * initialize the PDE pool. 679 */ 680 681 pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, 0, "pdppl", 682 &pool_allocator_nointr); 683 pool_set_ctordtor(&pmap_pdp_pool, pmap_pdp_ctor, NULL, NULL); 684 685 686 /* 687 * ensure the TLB is sync'd with reality by flushing it... 688 */ 689 690 tlbflush(); 691 692 return first_avail; 693 } 694 695 /* 696 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various 697 * trampoline code can be entered. 698 */ 699 paddr_t 700 pmap_prealloc_lowmem_ptps(paddr_t first_avail) 701 { 702 pd_entry_t *pdes; 703 int level; 704 paddr_t newp; 705 706 pdes = pmap_kernel()->pm_pdir; 707 level = PTP_LEVELS; 708 for (;;) { 709 newp = first_avail; first_avail += PAGE_SIZE; 710 memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE); 711 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW; 712 level--; 713 if (level <= 1) 714 break; 715 pdes = normal_pdes[level - 2]; 716 } 717 718 return first_avail; 719 } 720 721 /* 722 * pmap_init: called from uvm_init, our job is to get the pmap 723 * system ready to manage mappings... this mainly means initing 724 * the pv_entry stuff. 725 */ 726 727 void 728 pmap_init(void) 729 { 730 /* 731 * done: pmap module is up (and ready for business) 732 */ 733 734 pmap_initialized = TRUE; 735 } 736 737 /* 738 * p v _ e n t r y f u n c t i o n s 739 */ 740 741 /* 742 * main pv_entry manipulation functions: 743 * pmap_enter_pv: enter a mapping onto a pv list 744 * pmap_remove_pv: remove a mapping from a pv list 745 */ 746 747 /* 748 * pmap_enter_pv: enter a mapping onto a pv list 749 * 750 * => caller should adjust ptp's wire_count before calling 751 * 752 * pve: preallocated pve for us to use 753 * ptp: PTP in pmap that maps this VA 754 */ 755 756 void 757 pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap, 758 vaddr_t va, struct vm_page *ptp) 759 { 760 pve->pv_pmap = pmap; 761 pve->pv_va = va; 762 pve->pv_ptp = ptp; /* NULL for kernel pmap */ 763 pve->pv_next = pg->mdpage.pv_list; /* add to ... */ 764 pg->mdpage.pv_list = pve; /* ... list */ 765 } 766 767 /* 768 * pmap_remove_pv: try to remove a mapping from a pv_list 769 * 770 * => caller should adjust ptp's wire_count and free PTP if needed 771 * => we return the removed pve 772 */ 773 774 struct pv_entry * 775 pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va) 776 { 777 struct pv_entry *pve, **prevptr; 778 779 prevptr = &pg->mdpage.pv_list; 780 while ((pve = *prevptr) != NULL) { 781 if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */ 782 *prevptr = pve->pv_next; /* remove it! */ 783 break; 784 } 785 prevptr = &pve->pv_next; /* previous pointer */ 786 } 787 return(pve); /* return removed pve */ 788 } 789 790 /* 791 * p t p f u n c t i o n s 792 */ 793 794 struct vm_page * 795 pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level) 796 { 797 int lidx = level - 1; 798 struct vm_page *pg; 799 800 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] && 801 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) { 802 return (pmap->pm_ptphint[lidx]); 803 } 804 if (lidx == 0) 805 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 806 else { 807 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 808 } 809 return pg; 810 } 811 812 void 813 pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level, 814 struct pg_to_free *pagelist) 815 { 816 int lidx; 817 struct uvm_object *obj; 818 819 lidx = level - 1; 820 821 obj = &pmap->pm_obj[lidx]; 822 pmap->pm_stats.resident_count--; 823 if (pmap->pm_ptphint[lidx] == ptp) 824 pmap->pm_ptphint[lidx] = RB_ROOT(&obj->memt); 825 ptp->wire_count = 0; 826 uvm_pagerealloc(ptp, NULL, 0); 827 TAILQ_INSERT_TAIL(pagelist, ptp, pageq); 828 } 829 830 void 831 pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va, 832 pt_entry_t *ptes, pd_entry_t **pdes, struct pg_to_free *pagelist) 833 { 834 unsigned long index; 835 int level; 836 vaddr_t invaladdr; 837 pd_entry_t opde; 838 839 level = 1; 840 do { 841 pmap_freepage(pmap, ptp, level, pagelist); 842 index = pl_i(va, level + 1); 843 opde = pmap_pte_set(&pdes[level - 1][index], 0); 844 invaladdr = level == 1 ? (vaddr_t)ptes : 845 (vaddr_t)pdes[level - 2]; 846 pmap_tlb_shootpage(curpcb->pcb_pmap, 847 invaladdr + index * PAGE_SIZE); 848 #if defined(MULTIPROCESSOR) 849 invaladdr = level == 1 ? (vaddr_t)PTE_BASE : 850 (vaddr_t)normal_pdes[level - 2]; 851 pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE); 852 #endif 853 if (level < PTP_LEVELS - 1) { 854 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1); 855 ptp->wire_count--; 856 if (ptp->wire_count > 1) 857 break; 858 } 859 } while (++level < PTP_LEVELS); 860 } 861 862 /* 863 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) 864 * 865 * => pmap should NOT be pmap_kernel() 866 */ 867 868 869 struct vm_page * 870 pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t **pdes) 871 { 872 struct vm_page *ptp, *pptp; 873 int i; 874 unsigned long index; 875 pd_entry_t *pva; 876 paddr_t ppa, pa; 877 struct uvm_object *obj; 878 879 ptp = NULL; 880 pa = (paddr_t)-1; 881 882 /* 883 * Loop through all page table levels seeing if we need to 884 * add a new page to that level. 885 */ 886 for (i = PTP_LEVELS; i > 1; i--) { 887 /* 888 * Save values from previous round. 889 */ 890 pptp = ptp; 891 ppa = pa; 892 893 index = pl_i(va, i); 894 pva = pdes[i - 2]; 895 896 if (pmap_valid_entry(pva[index])) { 897 ppa = pva[index] & PG_FRAME; 898 ptp = NULL; 899 continue; 900 } 901 902 obj = &pmap->pm_obj[i-2]; 903 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL, 904 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 905 906 if (ptp == NULL) 907 return NULL; 908 909 atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); 910 ptp->wire_count = 1; 911 pmap->pm_ptphint[i - 2] = ptp; 912 pa = VM_PAGE_TO_PHYS(ptp); 913 pva[index] = (pd_entry_t) (pa | PG_u | PG_RW | PG_V); 914 pmap->pm_stats.resident_count++; 915 /* 916 * If we're not in the top level, increase the 917 * wire count of the parent page. 918 */ 919 if (i < PTP_LEVELS) { 920 if (pptp == NULL) 921 pptp = pmap_find_ptp(pmap, va, ppa, i); 922 #ifdef DIAGNOSTIC 923 if (pptp == NULL) 924 panic("pde page disappeared"); 925 #endif 926 pptp->wire_count++; 927 } 928 } 929 930 /* 931 * ptp is not NULL if we just allocated a new ptp. If it's 932 * still NULL, we must look up the existing one. 933 */ 934 if (ptp == NULL) { 935 ptp = pmap_find_ptp(pmap, va, ppa, 1); 936 #ifdef DIAGNOSTIC 937 if (ptp == NULL) { 938 printf("va %lx ppa %lx\n", (unsigned long)va, 939 (unsigned long)ppa); 940 panic("pmap_get_ptp: unmanaged user PTP"); 941 } 942 #endif 943 } 944 945 pmap->pm_ptphint[0] = ptp; 946 return(ptp); 947 } 948 949 /* 950 * p m a p l i f e c y c l e f u n c t i o n s 951 */ 952 953 /* 954 * pmap_pdp_ctor: constructor for the PDP cache. 955 */ 956 957 int 958 pmap_pdp_ctor(void *arg, void *object, int flags) 959 { 960 pd_entry_t *pdir = object; 961 paddr_t pdirpa; 962 int npde; 963 964 /* fetch the physical address of the page directory. */ 965 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa); 966 967 /* zero init area */ 968 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t)); 969 970 /* put in recursive PDE to map the PTEs */ 971 pdir[PDIR_SLOT_PTE] = pdirpa | PG_V | PG_KW; 972 973 npde = nkptp[PTP_LEVELS - 1]; 974 975 /* put in kernel VM PDEs */ 976 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN], 977 npde * sizeof(pd_entry_t)); 978 979 /* zero the rest */ 980 memset(&pdir[PDIR_SLOT_KERN + npde], 0, 981 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t)); 982 983 pdir[PDIR_SLOT_DIRECT] = pmap_kernel()->pm_pdir[PDIR_SLOT_DIRECT]; 984 985 #if VM_MIN_KERNEL_ADDRESS != KERNBASE 986 pdir[pl4_pi(KERNBASE)] = PDP_BASE[pl4_pi(KERNBASE)]; 987 #endif 988 989 return (0); 990 } 991 992 /* 993 * pmap_create: create a pmap 994 * 995 * => note: old pmap interface took a "size" args which allowed for 996 * the creation of "software only" pmaps (not in bsd). 997 */ 998 999 struct pmap * 1000 pmap_create(void) 1001 { 1002 struct pmap *pmap; 1003 int i; 1004 u_int gen; 1005 1006 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); 1007 1008 /* init uvm_object */ 1009 for (i = 0; i < PTP_LEVELS - 1; i++) { 1010 uvm_objinit(&pmap->pm_obj[i], NULL, 1); 1011 pmap->pm_ptphint[i] = NULL; 1012 } 1013 pmap->pm_stats.wired_count = 0; 1014 pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */ 1015 pmap->pm_cpus = 0; 1016 1017 /* allocate PDP */ 1018 1019 /* 1020 * note that there is no need to splvm to protect us from 1021 * malloc since malloc allocates out of a submap and we should 1022 * have already allocated kernel PTPs to cover the range... 1023 */ 1024 1025 try_again: 1026 gen = pmap_pdp_cache_generation; 1027 pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK); 1028 1029 if (gen != pmap_pdp_cache_generation) { 1030 pool_put(&pmap_pdp_pool, pmap->pm_pdir); 1031 goto try_again; 1032 } 1033 1034 pmap->pm_pdirpa = pmap->pm_pdir[PDIR_SLOT_PTE] & PG_FRAME; 1035 1036 LIST_INSERT_HEAD(&pmaps, pmap, pm_list); 1037 return (pmap); 1038 } 1039 1040 /* 1041 * pmap_destroy: drop reference count on pmap. free pmap if 1042 * reference count goes to zero. 1043 */ 1044 1045 void 1046 pmap_destroy(struct pmap *pmap) 1047 { 1048 struct vm_page *pg; 1049 int refs; 1050 int i; 1051 1052 /* 1053 * drop reference count 1054 */ 1055 1056 refs = --pmap->pm_obj[0].uo_refs; 1057 if (refs > 0) { 1058 return; 1059 } 1060 1061 /* 1062 * reference count is zero, free pmap resources and then free pmap. 1063 */ 1064 1065 #ifdef DIAGNOSTIC 1066 if (pmap->pm_cpus != 0) 1067 printf("pmap_destroy: pmap %p cpus=0x%lx\n", 1068 (void *)pmap, pmap->pm_cpus); 1069 #endif 1070 1071 /* 1072 * remove it from global list of pmaps 1073 */ 1074 LIST_REMOVE(pmap, pm_list); 1075 1076 /* 1077 * free any remaining PTPs 1078 */ 1079 1080 for (i = 0; i < PTP_LEVELS - 1; i++) { 1081 while ((pg = RB_ROOT(&pmap->pm_obj[i].memt)) != NULL) { 1082 KASSERT((pg->pg_flags & PG_BUSY) == 0); 1083 1084 pg->wire_count = 0; 1085 uvm_pagefree(pg); 1086 } 1087 } 1088 1089 /* 1090 * MULTIPROCESSOR -- no need to flush out of other processors' 1091 * APTE space because we do that in pmap_unmap_ptes(). 1092 */ 1093 /* XXX: need to flush it out of other processor's APTE space? */ 1094 pool_put(&pmap_pdp_pool, pmap->pm_pdir); 1095 1096 pool_put(&pmap_pmap_pool, pmap); 1097 } 1098 1099 /* 1100 * Add a reference to the specified pmap. 1101 */ 1102 1103 void 1104 pmap_reference(struct pmap *pmap) 1105 { 1106 pmap->pm_obj[0].uo_refs++; 1107 } 1108 1109 /* 1110 * pmap_activate: activate a process' pmap (fill in %cr3) 1111 * 1112 * => called from cpu_fork() and when switching pmaps during exec 1113 * => if p is the curproc, then load it into the MMU 1114 */ 1115 1116 void 1117 pmap_activate(struct proc *p) 1118 { 1119 struct pcb *pcb = &p->p_addr->u_pcb; 1120 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 1121 1122 pcb->pcb_pmap = pmap; 1123 pcb->pcb_cr3 = pmap->pm_pdirpa; 1124 if (p == curproc) { 1125 lcr3(pcb->pcb_cr3); 1126 1127 /* 1128 * mark the pmap in use by this processor. 1129 */ 1130 x86_atomic_setbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); 1131 } 1132 } 1133 1134 /* 1135 * pmap_deactivate: deactivate a process' pmap 1136 */ 1137 1138 void 1139 pmap_deactivate(struct proc *p) 1140 { 1141 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 1142 1143 /* 1144 * mark the pmap no longer in use by this processor. 1145 */ 1146 x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); 1147 } 1148 1149 /* 1150 * end of lifecycle functions 1151 */ 1152 1153 /* 1154 * some misc. functions 1155 */ 1156 1157 boolean_t 1158 pmap_pdes_valid(vaddr_t va, pd_entry_t **pdes, pd_entry_t *lastpde) 1159 { 1160 int i; 1161 unsigned long index; 1162 pd_entry_t pde; 1163 1164 for (i = PTP_LEVELS; i > 1; i--) { 1165 index = pl_i(va, i); 1166 pde = pdes[i - 2][index]; 1167 if ((pde & PG_V) == 0) 1168 return FALSE; 1169 } 1170 if (lastpde != NULL) 1171 *lastpde = pde; 1172 return TRUE; 1173 } 1174 1175 /* 1176 * pmap_extract: extract a PA for the given VA 1177 */ 1178 1179 boolean_t 1180 pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap) 1181 { 1182 pt_entry_t *ptes, pte; 1183 pd_entry_t pde, **pdes; 1184 1185 if (pmap == pmap_kernel() && va >= PMAP_DIRECT_BASE && 1186 va < PMAP_DIRECT_END) { 1187 *pap = va - PMAP_DIRECT_BASE; 1188 return (TRUE); 1189 } 1190 1191 pmap_map_ptes(pmap, &ptes, &pdes); 1192 if (pmap_pdes_valid(va, pdes, &pde) == FALSE) { 1193 return FALSE; 1194 } 1195 1196 if (pde & PG_PS) { 1197 if (pap != NULL) 1198 *pap = (pde & PG_LGFRAME) | (va & 0x1fffff); 1199 pmap_unmap_ptes(pmap); 1200 return (TRUE); 1201 } 1202 1203 pte = ptes[pl1_i(va)]; 1204 pmap_unmap_ptes(pmap); 1205 1206 if (__predict_true((pte & PG_V) != 0)) { 1207 if (pap != NULL) 1208 *pap = (pte & PG_FRAME) | (va & 0xfff); 1209 return (TRUE); 1210 } 1211 1212 return FALSE; 1213 } 1214 1215 /* 1216 * pmap_zero_page: zero a page 1217 */ 1218 1219 void 1220 pmap_zero_page(struct vm_page *pg) 1221 { 1222 pagezero(pmap_map_direct(pg)); 1223 } 1224 1225 /* 1226 * pmap_flush_cache: flush the cache for a virtual address. 1227 */ 1228 void 1229 pmap_flush_cache(vaddr_t addr, vsize_t len) 1230 { 1231 vaddr_t i; 1232 1233 if (curcpu()->ci_cflushsz == 0) { 1234 wbinvd(); 1235 return; 1236 } 1237 1238 /* all cpus that have clflush also have mfence. */ 1239 mfence(); 1240 for (i = addr; i < addr + len; i += curcpu()->ci_cflushsz) 1241 clflush(i); 1242 mfence(); 1243 } 1244 1245 /* 1246 * pmap_pagezeroidle: the same, for the idle loop page zero'er. 1247 * Returns TRUE if the page was zero'd, FALSE if we aborted for 1248 * some reason. 1249 */ 1250 1251 boolean_t 1252 pmap_pageidlezero(struct vm_page *pg) 1253 { 1254 vaddr_t va = pmap_map_direct(pg); 1255 boolean_t rv = TRUE; 1256 long *ptr; 1257 int i; 1258 1259 /* 1260 * XXX - We'd really like to do this uncached. But at this moment 1261 * we're never called, so just pretend that this works. 1262 * It shouldn't be too hard to create a second direct map 1263 * with uncached mappings. 1264 */ 1265 for (i = 0, ptr = (long *) va; i < PAGE_SIZE / sizeof(long); i++) { 1266 if (!curcpu_is_idle()) { 1267 1268 /* 1269 * A process has become ready. Abort now, 1270 * so we don't keep it waiting while we 1271 * do slow memory access to finish this 1272 * page. 1273 */ 1274 1275 rv = FALSE; 1276 break; 1277 } 1278 *ptr++ = 0; 1279 } 1280 1281 return (rv); 1282 } 1283 1284 /* 1285 * pmap_copy_page: copy a page 1286 */ 1287 1288 void 1289 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 1290 { 1291 vaddr_t srcva = pmap_map_direct(srcpg); 1292 vaddr_t dstva = pmap_map_direct(dstpg); 1293 1294 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE); 1295 } 1296 1297 /* 1298 * p m a p r e m o v e f u n c t i o n s 1299 * 1300 * functions that remove mappings 1301 */ 1302 1303 /* 1304 * pmap_remove_ptes: remove PTEs from a PTP 1305 * 1306 * => must have proper locking on pmap_master_lock 1307 * => PTP must be mapped into KVA 1308 * => PTP should be null if pmap == pmap_kernel() 1309 */ 1310 1311 void 1312 pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, 1313 vaddr_t startva, vaddr_t endva, int flags) 1314 { 1315 struct pv_entry *pve; 1316 pt_entry_t *pte = (pt_entry_t *) ptpva; 1317 struct vm_page *pg; 1318 pt_entry_t opte; 1319 1320 /* 1321 * note that ptpva points to the PTE that maps startva. this may 1322 * or may not be the first PTE in the PTP. 1323 * 1324 * we loop through the PTP while there are still PTEs to look at 1325 * and the wire_count is greater than 1 (because we use the wire_count 1326 * to keep track of the number of real PTEs in the PTP). 1327 */ 1328 1329 for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) 1330 ; pte++, startva += PAGE_SIZE) { 1331 if (!pmap_valid_entry(*pte)) 1332 continue; /* VA not mapped */ 1333 if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { 1334 continue; 1335 } 1336 1337 /* atomically save the old PTE and zap! it */ 1338 opte = pmap_pte_set(pte, 0); 1339 1340 if (opte & PG_W) 1341 pmap->pm_stats.wired_count--; 1342 pmap->pm_stats.resident_count--; 1343 1344 if (ptp) 1345 ptp->wire_count--; /* dropping a PTE */ 1346 1347 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 1348 1349 /* 1350 * if we are not on a pv list we are done. 1351 */ 1352 1353 if ((opte & PG_PVLIST) == 0) { 1354 #ifdef DIAGNOSTIC 1355 if (pg != NULL) 1356 panic("pmap_remove_ptes: managed page without " 1357 "PG_PVLIST for 0x%lx", startva); 1358 #endif 1359 continue; 1360 } 1361 1362 #ifdef DIAGNOSTIC 1363 if (pg == NULL) 1364 panic("pmap_remove_ptes: unmanaged page marked " 1365 "PG_PVLIST, va = 0x%lx, pa = 0x%lx", 1366 startva, (u_long)(opte & PG_FRAME)); 1367 #endif 1368 1369 /* sync R/M bits */ 1370 pmap_sync_flags_pte(pg, opte); 1371 pve = pmap_remove_pv(pg, pmap, startva); 1372 1373 if (pve) { 1374 pool_put(&pmap_pv_pool, pve); 1375 } 1376 1377 /* end of "for" loop: time for next pte */ 1378 } 1379 } 1380 1381 1382 /* 1383 * pmap_remove_pte: remove a single PTE from a PTP 1384 * 1385 * => must have proper locking on pmap_master_lock 1386 * => PTP must be mapped into KVA 1387 * => PTP should be null if pmap == pmap_kernel() 1388 * => returns true if we removed a mapping 1389 */ 1390 1391 boolean_t 1392 pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, 1393 vaddr_t va, int flags) 1394 { 1395 struct pv_entry *pve; 1396 struct vm_page *pg; 1397 pt_entry_t opte; 1398 1399 if (!pmap_valid_entry(*pte)) 1400 return(FALSE); /* VA not mapped */ 1401 if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { 1402 return(FALSE); 1403 } 1404 1405 /* atomically save the old PTE and zap! it */ 1406 opte = pmap_pte_set(pte, 0); 1407 1408 if (opte & PG_W) 1409 pmap->pm_stats.wired_count--; 1410 pmap->pm_stats.resident_count--; 1411 1412 if (ptp) 1413 ptp->wire_count--; /* dropping a PTE */ 1414 1415 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 1416 1417 /* 1418 * if we are not on a pv list we are done. 1419 */ 1420 if ((opte & PG_PVLIST) == 0) { 1421 #ifdef DIAGNOSTIC 1422 if (pg != NULL) 1423 panic("pmap_remove_pte: managed page without " 1424 "PG_PVLIST for 0x%lx", va); 1425 #endif 1426 return(TRUE); 1427 } 1428 1429 #ifdef DIAGNOSTIC 1430 if (pg == NULL) 1431 panic("pmap_remove_pte: unmanaged page marked " 1432 "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, 1433 (u_long)(opte & PG_FRAME)); 1434 #endif 1435 1436 /* sync R/M bits */ 1437 pmap_sync_flags_pte(pg, opte); 1438 pve = pmap_remove_pv(pg, pmap, va); 1439 if (pve) 1440 pool_put(&pmap_pv_pool, pve); 1441 return(TRUE); 1442 } 1443 1444 /* 1445 * pmap_remove: top level mapping removal function 1446 * 1447 * => caller should not be holding any pmap locks 1448 */ 1449 1450 void 1451 pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 1452 { 1453 pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL); 1454 } 1455 1456 /* 1457 * pmap_do_remove: mapping removal guts 1458 * 1459 * => caller should not be holding any pmap locks 1460 */ 1461 1462 void 1463 pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) 1464 { 1465 pt_entry_t *ptes; 1466 pd_entry_t **pdes, pde; 1467 boolean_t result; 1468 paddr_t ptppa; 1469 vaddr_t blkendva; 1470 struct vm_page *ptp; 1471 vaddr_t va; 1472 int shootall = 0; 1473 struct pg_to_free empty_ptps; 1474 1475 TAILQ_INIT(&empty_ptps); 1476 1477 PMAP_MAP_TO_HEAD_LOCK(); 1478 pmap_map_ptes(pmap, &ptes, &pdes); 1479 1480 /* 1481 * removing one page? take shortcut function. 1482 */ 1483 1484 if (sva + PAGE_SIZE == eva) { 1485 if (pmap_pdes_valid(sva, pdes, &pde)) { 1486 1487 /* PA of the PTP */ 1488 ptppa = pde & PG_FRAME; 1489 1490 /* get PTP if non-kernel mapping */ 1491 1492 if (pmap == pmap_kernel()) { 1493 /* we never free kernel PTPs */ 1494 ptp = NULL; 1495 } else { 1496 ptp = pmap_find_ptp(pmap, sva, ptppa, 1); 1497 #ifdef DIAGNOSTIC 1498 if (ptp == NULL) 1499 panic("pmap_remove: unmanaged " 1500 "PTP detected"); 1501 #endif 1502 } 1503 1504 /* do it! */ 1505 result = pmap_remove_pte(pmap, ptp, 1506 &ptes[pl1_i(sva)], sva, flags); 1507 1508 /* 1509 * if mapping removed and the PTP is no longer 1510 * being used, free it! 1511 */ 1512 1513 if (result && ptp && ptp->wire_count <= 1) 1514 pmap_free_ptp(pmap, ptp, sva, ptes, pdes, 1515 &empty_ptps); 1516 pmap_tlb_shootpage(pmap, sva); 1517 } 1518 1519 pmap_tlb_shootwait(); 1520 pmap_unmap_ptes(pmap); 1521 PMAP_MAP_TO_HEAD_UNLOCK(); 1522 1523 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1524 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1525 uvm_pagefree(ptp); 1526 } 1527 1528 return; 1529 } 1530 1531 if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) 1532 shootall = 1; 1533 1534 for (va = sva; va < eva; va = blkendva) { 1535 /* determine range of block */ 1536 blkendva = x86_round_pdr(va + 1); 1537 if (blkendva > eva) 1538 blkendva = eva; 1539 1540 /* 1541 * XXXCDC: our PTE mappings should never be removed 1542 * with pmap_remove! if we allow this (and why would 1543 * we?) then we end up freeing the pmap's page 1544 * directory page (PDP) before we are finished using 1545 * it when we hit in in the recursive mapping. this 1546 * is BAD. 1547 * 1548 * long term solution is to move the PTEs out of user 1549 * address space. and into kernel address space (up 1550 * with APTE). then we can set VM_MAXUSER_ADDRESS to 1551 * be VM_MAX_ADDRESS. 1552 */ 1553 1554 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE) 1555 /* XXXCDC: ugly hack to avoid freeing PDP here */ 1556 continue; 1557 1558 if (!pmap_pdes_valid(va, pdes, &pde)) 1559 continue; 1560 1561 /* PA of the PTP */ 1562 ptppa = pde & PG_FRAME; 1563 1564 /* get PTP if non-kernel mapping */ 1565 if (pmap == pmap_kernel()) { 1566 /* we never free kernel PTPs */ 1567 ptp = NULL; 1568 } else { 1569 ptp = pmap_find_ptp(pmap, va, ptppa, 1); 1570 #ifdef DIAGNOSTIC 1571 if (ptp == NULL) 1572 panic("pmap_remove: unmanaged PTP " 1573 "detected"); 1574 #endif 1575 } 1576 pmap_remove_ptes(pmap, ptp, 1577 (vaddr_t)&ptes[pl1_i(va)], va, blkendva, flags); 1578 1579 /* if PTP is no longer being used, free it! */ 1580 if (ptp && ptp->wire_count <= 1) { 1581 pmap_free_ptp(pmap, ptp, va, ptes, pdes, &empty_ptps); 1582 } 1583 } 1584 1585 if (shootall) 1586 pmap_tlb_shoottlb(); 1587 else 1588 pmap_tlb_shootrange(pmap, sva, eva); 1589 1590 pmap_tlb_shootwait(); 1591 1592 pmap_unmap_ptes(pmap); 1593 PMAP_MAP_TO_HEAD_UNLOCK(); 1594 1595 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1596 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1597 uvm_pagefree(ptp); 1598 } 1599 } 1600 1601 /* 1602 * pmap_page_remove: remove a managed vm_page from all pmaps that map it 1603 * 1604 * => R/M bits are sync'd back to attrs 1605 */ 1606 1607 void 1608 pmap_page_remove(struct vm_page *pg) 1609 { 1610 struct pv_entry *pve; 1611 pt_entry_t *ptes, opte; 1612 pd_entry_t **pdes; 1613 #ifdef DIAGNOSTIC 1614 pd_entry_t pde; 1615 #endif 1616 struct pg_to_free empty_ptps; 1617 struct vm_page *ptp; 1618 1619 TAILQ_INIT(&empty_ptps); 1620 1621 PMAP_HEAD_TO_MAP_LOCK(); 1622 1623 while ((pve = pg->mdpage.pv_list) != NULL) { 1624 pg->mdpage.pv_list = pve->pv_next; 1625 1626 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1627 1628 #ifdef DIAGNOSTIC 1629 if (pve->pv_ptp && pmap_pdes_valid(pve->pv_va, pdes, &pde) && 1630 (pde & PG_FRAME) != VM_PAGE_TO_PHYS(pve->pv_ptp)) { 1631 printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", 1632 pg, pve->pv_va, pve->pv_ptp); 1633 printf("pmap_page_remove: PTP's phys addr: " 1634 "actual=%lx, recorded=%lx\n", 1635 (unsigned long)(pde & PG_FRAME), 1636 VM_PAGE_TO_PHYS(pve->pv_ptp)); 1637 panic("pmap_page_remove: mapped managed page has " 1638 "invalid pv_ptp field"); 1639 } 1640 #endif 1641 1642 /* atomically save the old PTE and zap it */ 1643 opte = pmap_pte_set(&ptes[pl1_i(pve->pv_va)], 0); 1644 1645 if (opte & PG_W) 1646 pve->pv_pmap->pm_stats.wired_count--; 1647 pve->pv_pmap->pm_stats.resident_count--; 1648 1649 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); 1650 1651 pmap_sync_flags_pte(pg, opte); 1652 1653 /* update the PTP reference count. free if last reference. */ 1654 if (pve->pv_ptp) { 1655 pve->pv_ptp->wire_count--; 1656 if (pve->pv_ptp->wire_count <= 1) { 1657 pmap_free_ptp(pve->pv_pmap, pve->pv_ptp, 1658 pve->pv_va, ptes, pdes, &empty_ptps); 1659 } 1660 } 1661 pmap_unmap_ptes(pve->pv_pmap); 1662 pool_put(&pmap_pv_pool, pve); 1663 } 1664 1665 PMAP_HEAD_TO_MAP_UNLOCK(); 1666 pmap_tlb_shootwait(); 1667 1668 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1669 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1670 uvm_pagefree(ptp); 1671 } 1672 } 1673 1674 /* 1675 * p m a p a t t r i b u t e f u n c t i o n s 1676 * functions that test/change managed page's attributes 1677 * since a page can be mapped multiple times we must check each PTE that 1678 * maps it by going down the pv lists. 1679 */ 1680 1681 /* 1682 * pmap_test_attrs: test a page's attributes 1683 */ 1684 1685 boolean_t 1686 pmap_test_attrs(struct vm_page *pg, unsigned int testbits) 1687 { 1688 struct pv_entry *pve; 1689 pt_entry_t *ptes, pte; 1690 pd_entry_t **pdes; 1691 u_long mybits, testflags; 1692 1693 testflags = pmap_pte2flags(testbits); 1694 1695 if (pg->pg_flags & testflags) 1696 return (TRUE); 1697 1698 PMAP_HEAD_TO_MAP_LOCK(); 1699 mybits = 0; 1700 for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0; 1701 pve = pve->pv_next) { 1702 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1703 pte = ptes[pl1_i(pve->pv_va)]; 1704 pmap_unmap_ptes(pve->pv_pmap); 1705 mybits |= (pte & testbits); 1706 } 1707 PMAP_HEAD_TO_MAP_UNLOCK(); 1708 1709 if (mybits == 0) 1710 return (FALSE); 1711 1712 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(mybits)); 1713 1714 return (TRUE); 1715 } 1716 1717 /* 1718 * pmap_clear_attrs: change a page's attributes 1719 * 1720 * => we return TRUE if we cleared one of the bits we were asked to 1721 */ 1722 1723 boolean_t 1724 pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits) 1725 { 1726 struct pv_entry *pve; 1727 pt_entry_t *ptes, opte; 1728 pd_entry_t **pdes; 1729 u_long clearflags; 1730 int result; 1731 1732 clearflags = pmap_pte2flags(clearbits); 1733 1734 PMAP_HEAD_TO_MAP_LOCK(); 1735 1736 result = pg->pg_flags & clearflags; 1737 if (result) 1738 atomic_clearbits_int(&pg->pg_flags, clearflags); 1739 1740 for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) { 1741 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1742 #ifdef DIAGNOSTIC 1743 if (!pmap_pdes_valid(pve->pv_va, pdes, NULL)) 1744 panic("pmap_change_attrs: mapping without PTP " 1745 "detected"); 1746 #endif 1747 1748 opte = ptes[pl1_i(pve->pv_va)]; 1749 if (opte & clearbits) { 1750 result = 1; 1751 pmap_pte_clearbits(&ptes[pl1_i(pve->pv_va)], 1752 (opte & clearbits)); 1753 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); 1754 } 1755 pmap_unmap_ptes(pve->pv_pmap); 1756 } 1757 1758 PMAP_HEAD_TO_MAP_UNLOCK(); 1759 1760 pmap_tlb_shootwait(); 1761 1762 return (result != 0); 1763 } 1764 1765 /* 1766 * p m a p p r o t e c t i o n f u n c t i o n s 1767 */ 1768 1769 /* 1770 * pmap_page_protect: change the protection of all recorded mappings 1771 * of a managed page 1772 * 1773 * => NOTE: this is an inline function in pmap.h 1774 */ 1775 1776 /* see pmap.h */ 1777 1778 /* 1779 * pmap_protect: set the protection in of the pages in a pmap 1780 * 1781 * => NOTE: this is an inline function in pmap.h 1782 */ 1783 1784 /* see pmap.h */ 1785 1786 /* 1787 * pmap_write_protect: write-protect pages in a pmap 1788 */ 1789 1790 void 1791 pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1792 { 1793 pt_entry_t nx, *ptes, *spte, *epte; 1794 pd_entry_t **pdes; 1795 vaddr_t blockend; 1796 int shootall = 0; 1797 vaddr_t va; 1798 1799 pmap_map_ptes(pmap, &ptes, &pdes); 1800 1801 /* should be ok, but just in case ... */ 1802 sva &= PG_FRAME; 1803 eva &= PG_FRAME; 1804 1805 nx = 0; 1806 if ((cpu_feature & CPUID_NXE) && !(prot & VM_PROT_EXECUTE)) 1807 nx = PG_NX; 1808 1809 if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) 1810 shootall = 1; 1811 1812 for (va = sva; va < eva ; va = blockend) { 1813 blockend = (va & L2_FRAME) + NBPD_L2; 1814 if (blockend > eva) 1815 blockend = eva; 1816 1817 /* 1818 * XXXCDC: our PTE mappings should never be write-protected! 1819 * 1820 * long term solution is to move the PTEs out of user 1821 * address space. and into kernel address space (up 1822 * with APTE). then we can set VM_MAXUSER_ADDRESS to 1823 * be VM_MAX_ADDRESS. 1824 */ 1825 1826 /* XXXCDC: ugly hack to avoid freeing PDP here */ 1827 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE) 1828 continue; 1829 1830 /* empty block? */ 1831 if (!pmap_pdes_valid(va, pdes, NULL)) 1832 continue; 1833 1834 #ifdef DIAGNOSTIC 1835 if (va >= VM_MAXUSER_ADDRESS && va < VM_MAX_ADDRESS) 1836 panic("pmap_write_protect: PTE space"); 1837 #endif 1838 1839 spte = &ptes[pl1_i(va)]; 1840 epte = &ptes[pl1_i(blockend)]; 1841 1842 for (/*null */; spte < epte ; spte++) { 1843 if (!(*spte & PG_V)) 1844 continue; 1845 pmap_pte_clearbits(spte, PG_RW); 1846 pmap_pte_setbits(spte, nx); 1847 } 1848 } 1849 1850 if (shootall) 1851 pmap_tlb_shoottlb(); 1852 else 1853 pmap_tlb_shootrange(pmap, sva, eva); 1854 1855 pmap_tlb_shootwait(); 1856 1857 pmap_unmap_ptes(pmap); 1858 } 1859 1860 /* 1861 * end of protection functions 1862 */ 1863 1864 /* 1865 * pmap_unwire: clear the wired bit in the PTE 1866 * 1867 * => mapping should already be in map 1868 */ 1869 1870 void 1871 pmap_unwire(struct pmap *pmap, vaddr_t va) 1872 { 1873 pt_entry_t *ptes; 1874 pd_entry_t **pdes; 1875 1876 pmap_map_ptes(pmap, &ptes, &pdes); 1877 1878 if (pmap_pdes_valid(va, pdes, NULL)) { 1879 1880 #ifdef DIAGNOSTIC 1881 if (!pmap_valid_entry(ptes[pl1_i(va)])) 1882 panic("pmap_unwire: invalid (unmapped) va 0x%lx", va); 1883 #endif 1884 if ((ptes[pl1_i(va)] & PG_W) != 0) { 1885 pmap_pte_clearbits(&ptes[pl1_i(va)], PG_W); 1886 pmap->pm_stats.wired_count--; 1887 } 1888 #ifdef DIAGNOSTIC 1889 else { 1890 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 1891 "didn't change!\n", pmap, va); 1892 } 1893 #endif 1894 pmap_unmap_ptes(pmap); 1895 } 1896 #ifdef DIAGNOSTIC 1897 else { 1898 panic("pmap_unwire: invalid PDE"); 1899 } 1900 #endif 1901 } 1902 1903 /* 1904 * pmap_collect: free resources held by a pmap 1905 * 1906 * => optional function. 1907 * => called when a process is swapped out to free memory. 1908 */ 1909 1910 void 1911 pmap_collect(struct pmap *pmap) 1912 { 1913 /* 1914 * free all of the pt pages by removing the physical mappings 1915 * for its entire address space. 1916 */ 1917 1918 /* pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, 1919 PMAP_REMOVE_SKIPWIRED); 1920 */ 1921 } 1922 1923 /* 1924 * pmap_copy: copy mappings from one pmap to another 1925 * 1926 * => optional function 1927 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1928 */ 1929 1930 /* 1931 * defined as macro in pmap.h 1932 */ 1933 1934 /* 1935 * pmap_enter: enter a mapping into a pmap 1936 * 1937 * => must be done "now" ... no lazy-evaluation 1938 */ 1939 1940 int 1941 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 1942 { 1943 pt_entry_t *ptes, opte, npte; 1944 pd_entry_t **pdes; 1945 struct vm_page *ptp, *pg = NULL; 1946 struct pv_entry *pve = NULL; 1947 int ptpdelta, wireddelta, resdelta; 1948 boolean_t wired = (flags & PMAP_WIRED) != 0; 1949 boolean_t nocache = (pa & PMAP_NOCACHE) != 0; 1950 boolean_t wc = (pa & PMAP_WC) != 0; 1951 int error; 1952 1953 KASSERT(!(wc && nocache)); 1954 pa &= PMAP_PA_MASK; 1955 1956 #ifdef DIAGNOSTIC 1957 if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) 1958 panic("pmap_enter: trying to map over PDP/APDP!"); 1959 1960 /* sanity check: kernel PTPs should already have been pre-allocated */ 1961 if (va >= VM_MIN_KERNEL_ADDRESS && 1962 !pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)])) 1963 panic("pmap_enter: missing kernel PTP for va %lx!", va); 1964 1965 #endif 1966 1967 /* get lock */ 1968 PMAP_MAP_TO_HEAD_LOCK(); 1969 1970 /* 1971 * map in ptes and get a pointer to our PTP (unless we are the kernel) 1972 */ 1973 1974 pmap_map_ptes(pmap, &ptes, &pdes); 1975 if (pmap == pmap_kernel()) { 1976 ptp = NULL; 1977 } else { 1978 ptp = pmap_get_ptp(pmap, va, pdes); 1979 if (ptp == NULL) { 1980 if (flags & PMAP_CANFAIL) { 1981 error = ENOMEM; 1982 goto out; 1983 } 1984 panic("pmap_enter: get ptp failed"); 1985 } 1986 } 1987 opte = ptes[pl1_i(va)]; /* old PTE */ 1988 1989 /* 1990 * is there currently a valid mapping at our VA? 1991 */ 1992 1993 if (pmap_valid_entry(opte)) { 1994 /* 1995 * first, calculate pm_stats updates. resident count will not 1996 * change since we are replacing/changing a valid mapping. 1997 * wired count might change... 1998 */ 1999 2000 resdelta = 0; 2001 if (wired && (opte & PG_W) == 0) 2002 wireddelta = 1; 2003 else if (!wired && (opte & PG_W) != 0) 2004 wireddelta = -1; 2005 else 2006 wireddelta = 0; 2007 ptpdelta = 0; 2008 2009 /* 2010 * is the currently mapped PA the same as the one we 2011 * want to map? 2012 */ 2013 2014 if ((opte & PG_FRAME) == pa) { 2015 2016 /* if this is on the PVLIST, sync R/M bit */ 2017 if (opte & PG_PVLIST) { 2018 pg = PHYS_TO_VM_PAGE(pa); 2019 #ifdef DIAGNOSTIC 2020 if (pg == NULL) 2021 panic("pmap_enter: same pa PG_PVLIST " 2022 "mapping with unmanaged page " 2023 "pa = 0x%lx (0x%lx)", pa, 2024 atop(pa)); 2025 #endif 2026 pmap_sync_flags_pte(pg, opte); 2027 } else { 2028 #ifdef DIAGNOSTIC 2029 if (PHYS_TO_VM_PAGE(pa) != NULL) 2030 panic("pmap_enter: same pa, managed " 2031 "page, no PG_VLIST pa: 0x%lx\n", 2032 pa); 2033 #endif 2034 } 2035 goto enter_now; 2036 } 2037 2038 /* 2039 * changing PAs: we must remove the old one first 2040 */ 2041 2042 /* 2043 * if current mapping is on a pvlist, 2044 * remove it (sync R/M bits) 2045 */ 2046 2047 if (opte & PG_PVLIST) { 2048 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 2049 #ifdef DIAGNOSTIC 2050 if (pg == NULL) 2051 panic("pmap_enter: PG_PVLIST mapping with " 2052 "unmanaged page " 2053 "pa = 0x%lx (0x%lx)", pa, atop(pa)); 2054 #endif 2055 pmap_sync_flags_pte(pg, opte); 2056 pve = pmap_remove_pv(pg, pmap, va); 2057 pg = NULL; /* This is not the page we are looking for */ 2058 } 2059 } else { /* opte not valid */ 2060 pve = NULL; 2061 resdelta = 1; 2062 if (wired) 2063 wireddelta = 1; 2064 else 2065 wireddelta = 0; 2066 if (ptp) 2067 ptpdelta = 1; 2068 else 2069 ptpdelta = 0; 2070 } 2071 2072 /* 2073 * pve is either NULL or points to a now-free pv_entry structure 2074 * (the latter case is if we called pmap_remove_pv above). 2075 * 2076 * if this entry is to be on a pvlist, enter it now. 2077 */ 2078 2079 if (pmap_initialized) 2080 pg = PHYS_TO_VM_PAGE(pa); 2081 2082 if (pg != NULL) { 2083 if (pve == NULL) { 2084 pve = pool_get(&pmap_pv_pool, PR_NOWAIT); 2085 if (pve == NULL) { 2086 if (flags & PMAP_CANFAIL) { 2087 error = ENOMEM; 2088 goto out; 2089 } 2090 panic("pmap_enter: no pv entries available"); 2091 } 2092 } 2093 pmap_enter_pv(pg, pve, pmap, va, ptp); 2094 } else { 2095 /* new mapping is not PG_PVLIST. free pve if we've got one */ 2096 if (pve) 2097 pool_put(&pmap_pv_pool, pve); 2098 } 2099 2100 enter_now: 2101 /* 2102 * at this point pg is !NULL if we want the PG_PVLIST bit set 2103 */ 2104 2105 pmap->pm_stats.resident_count += resdelta; 2106 pmap->pm_stats.wired_count += wireddelta; 2107 if (ptp) 2108 ptp->wire_count += ptpdelta; 2109 2110 if (pg != PHYS_TO_VM_PAGE(pa)) 2111 panic("wtf?"); 2112 2113 npte = pa | protection_codes[prot] | PG_V; 2114 if (pg != NULL) { 2115 npte |= PG_PVLIST; 2116 /* 2117 * make sure that if the page is write combined all 2118 * instances of pmap_enter make it so. 2119 */ 2120 if (pg->pg_flags & PG_PMAP_WC) { 2121 KASSERT(nocache == 0); 2122 wc = TRUE; 2123 } 2124 } 2125 if (wc) 2126 npte |= pmap_pg_wc; 2127 if (wired) 2128 npte |= PG_W; 2129 if (nocache) 2130 npte |= PG_N; 2131 if (va < VM_MAXUSER_ADDRESS) 2132 npte |= PG_u; 2133 else if (va < VM_MAX_ADDRESS) 2134 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ 2135 if (pmap == pmap_kernel()) 2136 npte |= pmap_pg_g; 2137 2138 ptes[pl1_i(va)] = npte; /* zap! */ 2139 2140 /* 2141 * If we changed anything other than modified/used bits, 2142 * flush the TLB. (is this overkill?) 2143 */ 2144 if (opte & PG_V) { 2145 if (nocache && (opte & PG_N) == 0) 2146 wbinvd(); 2147 pmap_tlb_shootpage(pmap, va); 2148 pmap_tlb_shootwait(); 2149 } 2150 2151 error = 0; 2152 2153 out: 2154 pmap_unmap_ptes(pmap); 2155 PMAP_MAP_TO_HEAD_UNLOCK(); 2156 2157 return error; 2158 } 2159 2160 boolean_t 2161 pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp) 2162 { 2163 struct vm_page *ptp; 2164 struct pmap *kpm = pmap_kernel(); 2165 2166 if (uvm.page_init_done == FALSE) { 2167 vaddr_t va; 2168 2169 /* 2170 * we're growing the kernel pmap early (from 2171 * uvm_pageboot_alloc()). this case must be 2172 * handled a little differently. 2173 */ 2174 2175 va = pmap_steal_memory(PAGE_SIZE, NULL, NULL); 2176 *paddrp = PMAP_DIRECT_UNMAP(va); 2177 } else { 2178 ptp = uvm_pagealloc(&kpm->pm_obj[level - 1], 2179 ptp_va2o(va, level), NULL, 2180 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2181 if (ptp == NULL) 2182 panic("pmap_get_physpage: out of memory"); 2183 atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); 2184 ptp->wire_count = 1; 2185 *paddrp = VM_PAGE_TO_PHYS(ptp); 2186 } 2187 kpm->pm_stats.resident_count++; 2188 return TRUE; 2189 } 2190 2191 /* 2192 * Allocate the amount of specified ptps for a ptp level, and populate 2193 * all levels below accordingly, mapping virtual addresses starting at 2194 * kva. 2195 * 2196 * Used by pmap_growkernel. 2197 */ 2198 void 2199 pmap_alloc_level(pd_entry_t **pdes, vaddr_t kva, int lvl, long *needed_ptps) 2200 { 2201 unsigned long i; 2202 vaddr_t va; 2203 paddr_t pa; 2204 unsigned long index, endindex; 2205 int level; 2206 pd_entry_t *pdep; 2207 2208 for (level = lvl; level > 1; level--) { 2209 if (level == PTP_LEVELS) 2210 pdep = pmap_kernel()->pm_pdir; 2211 else 2212 pdep = pdes[level - 2]; 2213 va = kva; 2214 index = pl_i(kva, level); 2215 endindex = index + needed_ptps[level - 1]; 2216 /* 2217 * XXX special case for first time call. 2218 */ 2219 if (nkptp[level - 1] != 0) 2220 index++; 2221 else 2222 endindex--; 2223 2224 for (i = index; i <= endindex; i++) { 2225 pmap_get_physpage(va, level - 1, &pa); 2226 pdep[i] = pa | PG_RW | PG_V; 2227 nkptp[level - 1]++; 2228 va += nbpd[level - 1]; 2229 } 2230 } 2231 } 2232 2233 /* 2234 * pmap_growkernel: increase usage of KVM space 2235 * 2236 * => we allocate new PTPs for the kernel and install them in all 2237 * the pmaps on the system. 2238 */ 2239 2240 static vaddr_t pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS; 2241 2242 vaddr_t 2243 pmap_growkernel(vaddr_t maxkvaddr) 2244 { 2245 struct pmap *kpm = pmap_kernel(), *pm; 2246 int s, i; 2247 unsigned newpdes; 2248 long needed_kptp[PTP_LEVELS], target_nptp, old; 2249 2250 if (maxkvaddr <= pmap_maxkvaddr) 2251 return pmap_maxkvaddr; 2252 2253 maxkvaddr = x86_round_pdr(maxkvaddr); 2254 old = nkptp[PTP_LEVELS - 1]; 2255 /* 2256 * This loop could be optimized more, but pmap_growkernel() 2257 * is called infrequently. 2258 */ 2259 for (i = PTP_LEVELS - 1; i >= 1; i--) { 2260 target_nptp = pl_i(maxkvaddr, i + 1) - 2261 pl_i(VM_MIN_KERNEL_ADDRESS, i + 1); 2262 /* 2263 * XXX only need to check toplevel. 2264 */ 2265 if (target_nptp > nkptpmax[i]) 2266 panic("out of KVA space"); 2267 needed_kptp[i] = target_nptp - nkptp[i] + 1; 2268 } 2269 2270 2271 s = splhigh(); /* to be safe */ 2272 pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, 2273 needed_kptp); 2274 2275 /* 2276 * If the number of top level entries changed, update all 2277 * pmaps. 2278 */ 2279 if (needed_kptp[PTP_LEVELS - 1] != 0) { 2280 newpdes = nkptp[PTP_LEVELS - 1] - old; 2281 LIST_FOREACH(pm, &pmaps, pm_list) { 2282 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], 2283 &kpm->pm_pdir[PDIR_SLOT_KERN + old], 2284 newpdes * sizeof (pd_entry_t)); 2285 } 2286 2287 /* Invalidate the PDP cache. */ 2288 #if 0 2289 pool_cache_invalidate(&pmap_pdp_cache); 2290 #endif 2291 pmap_pdp_cache_generation++; 2292 } 2293 pmap_maxkvaddr = maxkvaddr; 2294 splx(s); 2295 2296 return maxkvaddr; 2297 } 2298 2299 vaddr_t 2300 pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end) 2301 { 2302 int segno; 2303 u_int npg; 2304 vaddr_t va; 2305 paddr_t pa; 2306 struct vm_physseg *seg; 2307 2308 size = round_page(size); 2309 npg = atop(size); 2310 2311 for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) { 2312 if (seg->avail_end - seg->avail_start < npg) 2313 continue; 2314 /* 2315 * We can only steal at an ``unused'' segment boundary, 2316 * i.e. either at the start or at the end. 2317 */ 2318 if (seg->avail_start == seg->start || 2319 seg->avail_end == seg->end) 2320 break; 2321 } 2322 if (segno == vm_nphysseg) { 2323 panic("pmap_steal_memory: out of memory"); 2324 } else { 2325 if (seg->avail_start == seg->start) { 2326 pa = ptoa(seg->avail_start); 2327 seg->avail_start += npg; 2328 seg->start += npg; 2329 } else { 2330 pa = ptoa(seg->avail_end) - size; 2331 seg->avail_end -= npg; 2332 seg->end -= npg; 2333 } 2334 /* 2335 * If all the segment has been consumed now, remove it. 2336 * Note that the crash dump code still knows about it 2337 * and will dump it correctly. 2338 */ 2339 if (seg->start == seg->end) { 2340 if (vm_nphysseg-- == 1) 2341 panic("pmap_steal_memory: out of memory"); 2342 while (segno < vm_nphysseg) { 2343 seg[0] = seg[1]; /* struct copy */ 2344 seg++; 2345 segno++; 2346 } 2347 } 2348 2349 va = PMAP_DIRECT_MAP(pa); 2350 memset((void *)va, 0, size); 2351 } 2352 2353 if (start != NULL) 2354 *start = virtual_avail; 2355 if (end != NULL) 2356 *end = VM_MAX_KERNEL_ADDRESS; 2357 2358 return (va); 2359 } 2360 2361 #ifdef DEBUG 2362 void pmap_dump(struct pmap *, vaddr_t, vaddr_t); 2363 2364 /* 2365 * pmap_dump: dump all the mappings from a pmap 2366 * 2367 * => caller should not be holding any pmap locks 2368 */ 2369 2370 void 2371 pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 2372 { 2373 pt_entry_t *ptes, *pte; 2374 pd_entry_t **pdes; 2375 vaddr_t blkendva; 2376 2377 /* 2378 * if end is out of range truncate. 2379 * if (end == start) update to max. 2380 */ 2381 2382 if (eva > VM_MAXUSER_ADDRESS || eva <= sva) 2383 eva = VM_MAXUSER_ADDRESS; 2384 2385 2386 PMAP_MAP_TO_HEAD_LOCK(); 2387 pmap_map_ptes(pmap, &ptes, &pdes); 2388 2389 /* 2390 * dumping a range of pages: we dump in PTP sized blocks (4MB) 2391 */ 2392 2393 for (/* null */ ; sva < eva ; sva = blkendva) { 2394 2395 /* determine range of block */ 2396 blkendva = x86_round_pdr(sva+1); 2397 if (blkendva > eva) 2398 blkendva = eva; 2399 2400 /* valid block? */ 2401 if (!pmap_pdes_valid(sva, pdes, NULL)) 2402 continue; 2403 2404 pte = &ptes[pl1_i(sva)]; 2405 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { 2406 if (!pmap_valid_entry(*pte)) 2407 continue; 2408 printf("va %#lx -> pa %#lx (pte=%#lx)\n", 2409 sva, *pte, *pte & PG_FRAME); 2410 } 2411 } 2412 pmap_unmap_ptes(pmap); 2413 PMAP_MAP_TO_HEAD_UNLOCK(); 2414 } 2415 #endif 2416 2417 void 2418 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 2419 { 2420 *vstartp = virtual_avail; 2421 *vendp = VM_MAX_KERNEL_ADDRESS; 2422 } 2423 2424 #ifdef MULTIPROCESSOR 2425 /* 2426 * Locking for tlb shootdown. 2427 * 2428 * We lock by setting tlb_shoot_wait to the number of cpus that will 2429 * receive our tlb shootdown. After sending the IPIs, we don't need to 2430 * worry about locking order or interrupts spinning for the lock because 2431 * the call that grabs the "lock" isn't the one that releases it. And 2432 * there is nothing that can block the IPI that releases the lock. 2433 * 2434 * The functions are organized so that we first count the number of 2435 * cpus we need to send the IPI to, then we grab the counter, then 2436 * we send the IPIs, then we finally do our own shootdown. 2437 * 2438 * Our shootdown is last to make it parallel with the other cpus 2439 * to shorten the spin time. 2440 * 2441 * Notice that we depend on failures to send IPIs only being able to 2442 * happen during boot. If they happen later, the above assumption 2443 * doesn't hold since we can end up in situations where noone will 2444 * release the lock if we get an interrupt in a bad moment. 2445 */ 2446 2447 volatile long tlb_shoot_wait; 2448 2449 volatile vaddr_t tlb_shoot_addr1; 2450 volatile vaddr_t tlb_shoot_addr2; 2451 2452 void 2453 pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) 2454 { 2455 struct cpu_info *ci, *self = curcpu(); 2456 CPU_INFO_ITERATOR cii; 2457 long wait = 0; 2458 u_int64_t mask = 0; 2459 2460 CPU_INFO_FOREACH(cii, ci) { 2461 if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || 2462 !(ci->ci_flags & CPUF_RUNNING)) 2463 continue; 2464 mask |= (1ULL << ci->ci_cpuid); 2465 wait++; 2466 } 2467 2468 if (wait > 0) { 2469 int s = splvm(); 2470 2471 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2472 while (tlb_shoot_wait != 0) 2473 SPINLOCK_SPIN_HOOK; 2474 } 2475 tlb_shoot_addr1 = va; 2476 CPU_INFO_FOREACH(cii, ci) { 2477 if ((mask & (1ULL << ci->ci_cpuid)) == 0) 2478 continue; 2479 if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0) 2480 panic("pmap_tlb_shootpage: ipi failed"); 2481 } 2482 splx(s); 2483 } 2484 2485 if (pmap_is_curpmap(pm)) 2486 pmap_update_pg(va); 2487 } 2488 2489 void 2490 pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) 2491 { 2492 struct cpu_info *ci, *self = curcpu(); 2493 CPU_INFO_ITERATOR cii; 2494 long wait = 0; 2495 u_int64_t mask = 0; 2496 vaddr_t va; 2497 2498 CPU_INFO_FOREACH(cii, ci) { 2499 if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || 2500 !(ci->ci_flags & CPUF_RUNNING)) 2501 continue; 2502 mask |= (1ULL << ci->ci_cpuid); 2503 wait++; 2504 } 2505 2506 if (wait > 0) { 2507 int s = splvm(); 2508 2509 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2510 while (tlb_shoot_wait != 0) 2511 SPINLOCK_SPIN_HOOK; 2512 } 2513 tlb_shoot_addr1 = sva; 2514 tlb_shoot_addr2 = eva; 2515 CPU_INFO_FOREACH(cii, ci) { 2516 if ((mask & (1ULL << ci->ci_cpuid)) == 0) 2517 continue; 2518 if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0) 2519 panic("pmap_tlb_shootrange: ipi failed"); 2520 } 2521 splx(s); 2522 } 2523 2524 if (pmap_is_curpmap(pm)) 2525 for (va = sva; va < eva; va += PAGE_SIZE) 2526 pmap_update_pg(va); 2527 } 2528 2529 void 2530 pmap_tlb_shoottlb(void) 2531 { 2532 struct cpu_info *ci, *self = curcpu(); 2533 CPU_INFO_ITERATOR cii; 2534 long wait = 0; 2535 u_int64_t mask = 0; 2536 2537 CPU_INFO_FOREACH(cii, ci) { 2538 if (ci == self || !(ci->ci_flags & CPUF_RUNNING)) 2539 continue; 2540 mask |= (1ULL << ci->ci_cpuid); 2541 wait++; 2542 } 2543 2544 if (wait) { 2545 int s = splvm(); 2546 2547 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2548 while (tlb_shoot_wait != 0) 2549 SPINLOCK_SPIN_HOOK; 2550 } 2551 2552 CPU_INFO_FOREACH(cii, ci) { 2553 if ((mask & (1ULL << ci->ci_cpuid)) == 0) 2554 continue; 2555 if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0) 2556 panic("pmap_tlb_shoottlb: ipi failed"); 2557 } 2558 splx(s); 2559 } 2560 2561 tlbflush(); 2562 } 2563 2564 void 2565 pmap_tlb_shootwait(void) 2566 { 2567 while (tlb_shoot_wait != 0) 2568 SPINLOCK_SPIN_HOOK; 2569 } 2570 2571 #else 2572 2573 void 2574 pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) 2575 { 2576 if (pmap_is_curpmap(pm)) 2577 pmap_update_pg(va); 2578 2579 } 2580 2581 void 2582 pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) 2583 { 2584 vaddr_t va; 2585 2586 for (va = sva; va < eva; va += PAGE_SIZE) 2587 pmap_update_pg(va); 2588 2589 } 2590 2591 void 2592 pmap_tlb_shoottlb(void) 2593 { 2594 tlbflush(); 2595 } 2596 #endif /* MULTIPROCESSOR */ 2597