1 /* $OpenBSD: pmap.c,v 1.68 2014/03/07 16:56:57 guenther Exp $ */ 2 /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ 3 4 /* 5 * 6 * Copyright (c) 1997 Charles D. Cranor and Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by Charles D. Cranor and 20 * Washington University. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright 2001 (c) Wasabi Systems, Inc. 38 * All rights reserved. 39 * 40 * Written by Frank van der Linden for Wasabi Systems, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed for the NetBSD Project by 53 * Wasabi Systems, Inc. 54 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 55 * or promote products derived from this software without specific prior 56 * written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 60 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 61 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 62 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 63 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 64 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 65 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 66 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 67 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 68 * POSSIBILITY OF SUCH DAMAGE. 69 */ 70 71 /* 72 * This is the i386 pmap modified and generalized to support x86-64 73 * as well. The idea is to hide the upper N levels of the page tables 74 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest 75 * is mostly untouched, except that it uses some more generalized 76 * macros and interfaces. 77 * 78 * This pmap has been tested on the i386 as well, and it can be easily 79 * adapted to PAE. 80 * 81 * fvdl@wasabisystems.com 18-Jun-2001 82 */ 83 84 /* 85 * pmap.c: i386 pmap module rewrite 86 * Chuck Cranor <chuck@ccrc.wustl.edu> 87 * 11-Aug-97 88 * 89 * history of this pmap module: in addition to my own input, i used 90 * the following references for this rewrite of the i386 pmap: 91 * 92 * [1] the NetBSD i386 pmap. this pmap appears to be based on the 93 * BSD hp300 pmap done by Mike Hibler at University of Utah. 94 * it was then ported to the i386 by William Jolitz of UUNET 95 * Technologies, Inc. Then Charles M. Hannum of the NetBSD 96 * project fixed some bugs and provided some speed ups. 97 * 98 * [2] the FreeBSD i386 pmap. this pmap seems to be the 99 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson 100 * and David Greenman. 101 * 102 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated 103 * between several processors. the VAX version was done by 104 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 105 * version was done by Lance Berc, Mike Kupfer, Bob Baron, 106 * David Golub, and Richard Draves. the alpha version was 107 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou 108 * (NetBSD/alpha). 109 */ 110 111 #include <sys/param.h> 112 #include <sys/systm.h> 113 #include <sys/proc.h> 114 #include <sys/malloc.h> 115 #include <sys/pool.h> 116 #include <sys/user.h> 117 #include <sys/kernel.h> 118 #include <sys/mutex.h> 119 #include <sys/sched.h> 120 121 #include <uvm/uvm.h> 122 123 #include <machine/atomic.h> 124 #include <machine/lock.h> 125 #include <machine/cpu.h> 126 #include <machine/specialreg.h> 127 128 #include <dev/isa/isareg.h> 129 #include <machine/isa_machdep.h> 130 131 /* 132 * general info: 133 * 134 * - for an explanation of how the i386 MMU hardware works see 135 * the comments in <machine/pte.h>. 136 * 137 * - for an explanation of the general memory structure used by 138 * this pmap (including the recursive mapping), see the comments 139 * in <machine/pmap.h>. 140 * 141 * this file contains the code for the "pmap module." the module's 142 * job is to manage the hardware's virtual to physical address mappings. 143 * note that there are two levels of mapping in the VM system: 144 * 145 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's 146 * to map ranges of virtual address space to objects/files. for 147 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only 148 * to the file /bin/ls starting at offset zero." note that 149 * the upper layer mapping is not concerned with how individual 150 * vm_pages are mapped. 151 * 152 * [2] the lower layer of the VM system (the pmap) maintains the mappings 153 * from virtual addresses. it is concerned with which vm_page is 154 * mapped where. for example, when you run /bin/ls and start 155 * at page 0x1000 the fault routine may lookup the correct page 156 * of the /bin/ls file and then ask the pmap layer to establish 157 * a mapping for it. 158 * 159 * note that information in the lower layer of the VM system can be 160 * thrown away since it can easily be reconstructed from the info 161 * in the upper layer. 162 * 163 * data structures we use include: 164 * - struct pmap: describes the address space of one process 165 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA 166 * - struct pg_to_free: a list of virtual addresses whose mappings 167 * have been changed. used for TLB flushing. 168 */ 169 170 /* 171 * memory allocation 172 * 173 * - there are three data structures that we must dynamically allocate: 174 * 175 * [A] new process' page directory page (PDP) 176 * - plan 1: done at pmap_create() we use 177 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this 178 * allocation. 179 * 180 * if we are low in free physical memory then we sleep in 181 * uvm_km_alloc -- in this case this is ok since we are creating 182 * a new pmap and should not be holding any locks. 183 * 184 * if the kernel is totally out of virtual space 185 * (i.e. uvm_km_alloc returns NULL), then we panic. 186 * 187 * XXX: the fork code currently has no way to return an "out of 188 * memory, try again" error code since uvm_fork [fka vm_fork] 189 * is a void function. 190 * 191 * [B] new page tables pages (PTP) 192 * call uvm_pagealloc() 193 * => success: zero page, add to pm_pdir 194 * => failure: we are out of free vm_pages, let pmap_enter() 195 * tell UVM about it. 196 * 197 * note: for kernel PTPs, we start with NKPTP of them. as we map 198 * kernel memory (at uvm_map time) we check to see if we've grown 199 * the kernel pmap. if so, we call the optional function 200 * pmap_growkernel() to grow the kernel PTPs in advance. 201 * 202 * [C] pv_entry structures 203 * - try to allocate one from the pool. 204 * If we fail, we simply let pmap_enter() tell UVM about it. 205 */ 206 207 vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; 208 int ptp_shifts[] = PTP_SHIFT_INITIALIZER; 209 long nkptp[] = NKPTP_INITIALIZER; 210 long nkptpmax[] = NKPTPMAX_INITIALIZER; 211 long nbpd[] = NBPD_INITIALIZER; 212 pd_entry_t *normal_pdes[] = PDES_INITIALIZER; 213 pd_entry_t *alternate_pdes[] = APDES_INITIALIZER; 214 215 /* int nkpde = NKPTP; */ 216 217 #define PMAP_MAP_TO_HEAD_LOCK() /* null */ 218 #define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ 219 220 #define PMAP_HEAD_TO_MAP_LOCK() /* null */ 221 #define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ 222 223 #define COUNT(x) /* nothing */ 224 225 /* 226 * global data structures 227 */ 228 229 struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ 230 231 /* 232 * pmap_pg_wc: if our processor supports PAT then we set this 233 * to be the pte bits for Write Combining. Else we fall back to 234 * UC- so mtrrs can override the cacheability; 235 */ 236 int pmap_pg_wc = PG_UCMINUS; 237 238 /* 239 * other data structures 240 */ 241 242 pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ 243 boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ 244 245 /* 246 * pv management structures. 247 */ 248 struct pool pmap_pv_pool; 249 250 /* 251 * linked list of all non-kernel pmaps 252 */ 253 254 struct pmap_head pmaps; 255 256 /* 257 * pool that pmap structures are allocated from 258 */ 259 260 struct pool pmap_pmap_pool; 261 262 /* 263 * When we're freeing a ptp, we need to delay the freeing until all 264 * tlb shootdown has been done. This is the list of the to-be-freed pages. 265 */ 266 TAILQ_HEAD(pg_to_free, vm_page); 267 268 /* 269 * pool that PDPs are allocated from 270 */ 271 272 struct pool pmap_pdp_pool; 273 void pmap_pdp_ctor(pd_entry_t *); 274 275 extern vaddr_t msgbuf_vaddr; 276 extern paddr_t msgbuf_paddr; 277 278 extern vaddr_t idt_vaddr; /* we allocate IDT early */ 279 extern paddr_t idt_paddr; 280 281 extern vaddr_t lo32_vaddr; 282 extern vaddr_t lo32_paddr; 283 284 vaddr_t virtual_avail; 285 extern int end; 286 287 /* 288 * local prototypes 289 */ 290 291 void pmap_enter_pv(struct vm_page *, struct pv_entry *, struct pmap *, 292 vaddr_t, struct vm_page *); 293 struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, pd_entry_t **); 294 struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); 295 void pmap_free_ptp(struct pmap *, struct vm_page *, 296 vaddr_t, pt_entry_t *, pd_entry_t **, struct pg_to_free *); 297 void pmap_freepage(struct pmap *, struct vm_page *, int, struct pg_to_free *); 298 static boolean_t pmap_is_active(struct pmap *, int); 299 void pmap_map_ptes(struct pmap *, pt_entry_t **, pd_entry_t ***); 300 struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t); 301 void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int); 302 boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, 303 vaddr_t, int); 304 void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, 305 vaddr_t, vaddr_t, int); 306 #define PMAP_REMOVE_ALL 0 /* remove all mappings */ 307 #define PMAP_REMOVE_SKIPWIRED 1 /* skip wired mappings */ 308 309 void pmap_unmap_ptes(struct pmap *); 310 boolean_t pmap_get_physpage(vaddr_t, int, paddr_t *); 311 boolean_t pmap_pdes_valid(vaddr_t, pd_entry_t **, pd_entry_t *); 312 void pmap_alloc_level(pd_entry_t **, vaddr_t, int, long *); 313 void pmap_apte_flush(struct pmap *pmap); 314 315 void pmap_sync_flags_pte(struct vm_page *, u_long); 316 317 /* 318 * p m a p i n l i n e h e l p e r f u n c t i o n s 319 */ 320 321 /* 322 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? 323 * of course the kernel is always loaded 324 */ 325 326 static __inline boolean_t 327 pmap_is_curpmap(struct pmap *pmap) 328 { 329 return((pmap == pmap_kernel()) || 330 (pmap->pm_pdirpa == (paddr_t) rcr3())); 331 } 332 333 /* 334 * pmap_is_active: is this pmap loaded into the specified processor's %cr3? 335 */ 336 337 static __inline boolean_t 338 pmap_is_active(struct pmap *pmap, int cpu_id) 339 { 340 return (pmap == pmap_kernel() || 341 (pmap->pm_cpus & (1ULL << cpu_id)) != 0); 342 } 343 344 static __inline u_int 345 pmap_pte2flags(u_long pte) 346 { 347 return (((pte & PG_U) ? PG_PMAP_REF : 0) | 348 ((pte & PG_M) ? PG_PMAP_MOD : 0)); 349 } 350 351 void 352 pmap_sync_flags_pte(struct vm_page *pg, u_long pte) 353 { 354 if (pte & (PG_U|PG_M)) { 355 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(pte)); 356 } 357 } 358 359 void 360 pmap_apte_flush(struct pmap *pmap) 361 { 362 pmap_tlb_shoottlb(); 363 pmap_tlb_shootwait(); 364 } 365 366 /* 367 * pmap_map_ptes: map a pmap's PTEs into KVM 368 * 369 * => we lock enough pmaps to keep things locked in 370 * => must be undone with pmap_unmap_ptes before returning 371 */ 372 373 void 374 pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp) 375 { 376 pd_entry_t opde, npde; 377 378 /* if curpmap then we are always mapped */ 379 if (pmap_is_curpmap(pmap)) { 380 *ptepp = PTE_BASE; 381 *pdeppp = normal_pdes; 382 return; 383 } 384 385 /* need to load a new alternate pt space into curpmap? */ 386 opde = *APDP_PDE; 387 if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { 388 npde = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); 389 *APDP_PDE = npde; 390 if (pmap_valid_entry(opde)) 391 pmap_apte_flush(curpcb->pcb_pmap); 392 } 393 *ptepp = APTE_BASE; 394 *pdeppp = alternate_pdes; 395 } 396 397 void 398 pmap_unmap_ptes(struct pmap *pmap) 399 { 400 if (pmap_is_curpmap(pmap)) 401 return; 402 403 #if defined(MULTIPROCESSOR) 404 *APDP_PDE = 0; 405 pmap_apte_flush(curpcb->pcb_pmap); 406 #endif 407 COUNT(apdp_pde_unmap); 408 } 409 410 /* 411 * p m a p k e n t e r f u n c t i o n s 412 * 413 * functions to quickly enter/remove pages from the kernel address 414 * space. pmap_kremove is exported to MI kernel. we make use of 415 * the recursive PTE mappings. 416 */ 417 418 /* 419 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking 420 * 421 * => no need to lock anything, assume va is already allocated 422 * => should be faster than normal pmap enter function 423 */ 424 425 void 426 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 427 { 428 pt_entry_t *pte, opte, npte; 429 430 pte = kvtopte(va); 431 432 npte = (pa & PMAP_PA_MASK) | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) | 433 ((pa & PMAP_NOCACHE) ? PG_N : 0) | 434 ((pa & PMAP_WC) ? pmap_pg_wc : 0) | PG_V; 435 436 /* special 1:1 mappings in the first 2MB must not be global */ 437 if (va >= (vaddr_t)NBPD_L2) 438 npte |= PG_G; 439 440 if ((cpu_feature & CPUID_NXE) && !(prot & VM_PROT_EXECUTE)) 441 npte |= PG_NX; 442 opte = pmap_pte_set(pte, npte); 443 #ifdef LARGEPAGES 444 /* XXX For now... */ 445 if (opte & PG_PS) 446 panic("pmap_kenter_pa: PG_PS"); 447 #endif 448 if (pmap_valid_entry(opte)) { 449 if (pa & PMAP_NOCACHE && (opte & PG_N) == 0) 450 wbinvd(); 451 /* This shouldn't happen */ 452 pmap_tlb_shootpage(pmap_kernel(), va); 453 pmap_tlb_shootwait(); 454 } 455 } 456 457 /* 458 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking 459 * 460 * => no need to lock anything 461 * => caller must dispose of any vm_page mapped in the va range 462 * => note: not an inline function 463 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE 464 * => we assume kernel only unmaps valid addresses and thus don't bother 465 * checking the valid bit before doing TLB flushing 466 */ 467 468 void 469 pmap_kremove(vaddr_t sva, vsize_t len) 470 { 471 pt_entry_t *pte, opte; 472 vaddr_t va, eva; 473 474 eva = sva + len; 475 476 for (va = sva; va != eva; va += PAGE_SIZE) { 477 pte = kvtopte(va); 478 479 opte = pmap_pte_set(pte, 0); 480 #ifdef LARGEPAGES 481 KASSERT((opte & PG_PS) == 0); 482 #endif 483 KASSERT((opte & PG_PVLIST) == 0); 484 } 485 486 pmap_tlb_shootrange(pmap_kernel(), sva, eva); 487 pmap_tlb_shootwait(); 488 } 489 490 /* 491 * p m a p i n i t f u n c t i o n s 492 * 493 * pmap_bootstrap and pmap_init are called during system startup 494 * to init the pmap module. pmap_bootstrap() does a low level 495 * init just to get things rolling. pmap_init() finishes the job. 496 */ 497 498 /* 499 * pmap_bootstrap: get the system in a state where it can run with VM 500 * properly enabled (called before main()). the VM system is 501 * fully init'd later... 502 * 503 * => on i386, locore.s has already enabled the MMU by allocating 504 * a PDP for the kernel, and nkpde PTP's for the kernel. 505 * => kva_start is the first free virtual address in kernel space 506 */ 507 508 paddr_t 509 pmap_bootstrap(paddr_t first_avail, paddr_t max_pa) 510 { 511 vaddr_t kva, kva_end, kva_start = VM_MIN_KERNEL_ADDRESS; 512 struct pmap *kpm; 513 int i; 514 unsigned long p1i; 515 pt_entry_t pg_nx = (cpu_feature & CPUID_NXE? PG_NX : 0); 516 long ndmpdp; 517 paddr_t dmpd, dmpdp; 518 519 /* 520 * define the boundaries of the managed kernel virtual address 521 * space. 522 */ 523 524 virtual_avail = kva_start; /* first free KVA */ 525 526 /* 527 * set up protection_codes: we need to be able to convert from 528 * a MI protection code (some combo of VM_PROT...) to something 529 * we can jam into a i386 PTE. 530 */ 531 532 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */ 533 protection_codes[VM_PROT_EXECUTE] = PG_RO; /* --x */ 534 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */ 535 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO; /* -rx */ 536 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */ 537 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW;/* w-x */ 538 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx; 539 /* wr- */ 540 protection_codes[VM_PROT_ALL] = PG_RW; /* wrx */ 541 542 /* 543 * now we init the kernel's pmap 544 * 545 * the kernel pmap's pm_obj is not used for much. however, in 546 * user pmaps the pm_obj contains the list of active PTPs. 547 * the pm_obj currently does not have a pager. it might be possible 548 * to add a pager that would allow a process to read-only mmap its 549 * own page tables (fast user level vtophys?). this may or may not 550 * be useful. 551 */ 552 553 kpm = pmap_kernel(); 554 for (i = 0; i < PTP_LEVELS - 1; i++) { 555 uvm_objinit(&kpm->pm_obj[i], NULL, 1); 556 kpm->pm_ptphint[i] = NULL; 557 } 558 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ 559 kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); 560 kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3; 561 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = 562 atop(kva_start - VM_MIN_KERNEL_ADDRESS); 563 564 /* 565 * the above is just a rough estimate and not critical to the proper 566 * operation of the system. 567 */ 568 569 curpcb->pcb_pmap = kpm; /* proc0's pcb */ 570 571 /* 572 * enable global TLB entries. 573 */ 574 /* add PG_G attribute to already mapped kernel pages */ 575 #if KERNBASE == VM_MIN_KERNEL_ADDRESS 576 for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ; 577 #else 578 kva_end = roundup((vaddr_t)&end, PAGE_SIZE); 579 for (kva = KERNBASE; kva < kva_end ; 580 #endif 581 kva += PAGE_SIZE) { 582 p1i = pl1_i(kva); 583 if (pmap_valid_entry(PTE_BASE[p1i])) 584 PTE_BASE[p1i] |= PG_G; 585 } 586 587 /* 588 * Map the direct map. The first 4GB were mapped in locore, here 589 * we map the rest if it exists. We actually use the direct map 590 * here to set up the page tables, we're assuming that we're still 591 * operating in the lower 4GB of memory. 592 */ 593 ndmpdp = (max_pa + NBPD_L3 - 1) >> L3_SHIFT; 594 if (ndmpdp < NDML2_ENTRIES) 595 ndmpdp = NDML2_ENTRIES; /* At least 4GB */ 596 597 dmpdp = kpm->pm_pdir[PDIR_SLOT_DIRECT] & PG_FRAME; 598 599 dmpd = first_avail; first_avail += ndmpdp * PAGE_SIZE; 600 601 for (i = NDML2_ENTRIES; i < NPDPG * ndmpdp; i++) { 602 paddr_t pdp; 603 vaddr_t va; 604 605 pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]); 606 va = PMAP_DIRECT_MAP(pdp); 607 608 *((pd_entry_t *)va) = ((paddr_t)i << L2_SHIFT); 609 *((pd_entry_t *)va) |= PG_RW | PG_V | PG_PS | PG_G | PG_U | 610 PG_M; 611 } 612 613 for (i = NDML2_ENTRIES; i < ndmpdp; i++) { 614 paddr_t pdp; 615 vaddr_t va; 616 617 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]); 618 va = PMAP_DIRECT_MAP(pdp); 619 620 *((pd_entry_t *)va) = dmpd + (i << PAGE_SHIFT); 621 *((pd_entry_t *)va) |= PG_RW | PG_V | PG_U | PG_M; 622 } 623 624 kpm->pm_pdir[PDIR_SLOT_DIRECT] = dmpdp | PG_V | PG_KW | PG_U | 625 PG_M; 626 627 tlbflush(); 628 629 msgbuf_vaddr = virtual_avail; 630 virtual_avail += round_page(MSGBUFSIZE); 631 632 idt_vaddr = virtual_avail; 633 virtual_avail += 2 * PAGE_SIZE; 634 idt_paddr = first_avail; /* steal a page */ 635 first_avail += 2 * PAGE_SIZE; 636 637 #if defined(MULTIPROCESSOR) || \ 638 (NACPI > 0 && !defined(SMALL_KERNEL)) 639 /* 640 * Grab a page below 4G for things that need it (i.e. 641 * having an initial %cr3 for the MP trampoline). 642 */ 643 lo32_vaddr = virtual_avail; 644 virtual_avail += PAGE_SIZE; 645 lo32_paddr = first_avail; 646 first_avail += PAGE_SIZE; 647 #endif 648 649 /* 650 * init the global lists. 651 */ 652 LIST_INIT(&pmaps); 653 654 /* 655 * initialize the pmap pool. 656 */ 657 658 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", 659 &pool_allocator_nointr); 660 pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl", 661 &pool_allocator_nointr); 662 pool_sethiwat(&pmap_pv_pool, 32 * 1024); 663 664 /* 665 * initialize the PDE pool. 666 */ 667 668 pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, 0, "pdppl", 669 &pool_allocator_nointr); 670 671 /* 672 * ensure the TLB is sync'd with reality by flushing it... 673 */ 674 675 tlbflush(); 676 677 return first_avail; 678 } 679 680 /* 681 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various 682 * trampoline code can be entered. 683 */ 684 paddr_t 685 pmap_prealloc_lowmem_ptps(paddr_t first_avail) 686 { 687 pd_entry_t *pdes; 688 int level; 689 paddr_t newp; 690 691 pdes = pmap_kernel()->pm_pdir; 692 level = PTP_LEVELS; 693 for (;;) { 694 newp = first_avail; first_avail += PAGE_SIZE; 695 memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE); 696 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW; 697 level--; 698 if (level <= 1) 699 break; 700 pdes = normal_pdes[level - 2]; 701 } 702 703 return first_avail; 704 } 705 706 /* 707 * pmap_init: called from uvm_init, our job is to get the pmap 708 * system ready to manage mappings... this mainly means initing 709 * the pv_entry stuff. 710 */ 711 712 void 713 pmap_init(void) 714 { 715 /* 716 * done: pmap module is up (and ready for business) 717 */ 718 719 pmap_initialized = TRUE; 720 } 721 722 /* 723 * p v _ e n t r y f u n c t i o n s 724 */ 725 726 /* 727 * main pv_entry manipulation functions: 728 * pmap_enter_pv: enter a mapping onto a pv list 729 * pmap_remove_pv: remove a mapping from a pv list 730 */ 731 732 /* 733 * pmap_enter_pv: enter a mapping onto a pv list 734 * 735 * => caller should adjust ptp's wire_count before calling 736 * 737 * pve: preallocated pve for us to use 738 * ptp: PTP in pmap that maps this VA 739 */ 740 741 void 742 pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap, 743 vaddr_t va, struct vm_page *ptp) 744 { 745 pve->pv_pmap = pmap; 746 pve->pv_va = va; 747 pve->pv_ptp = ptp; /* NULL for kernel pmap */ 748 pve->pv_next = pg->mdpage.pv_list; /* add to ... */ 749 pg->mdpage.pv_list = pve; /* ... list */ 750 } 751 752 /* 753 * pmap_remove_pv: try to remove a mapping from a pv_list 754 * 755 * => caller should adjust ptp's wire_count and free PTP if needed 756 * => we return the removed pve 757 */ 758 759 struct pv_entry * 760 pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va) 761 { 762 struct pv_entry *pve, **prevptr; 763 764 prevptr = &pg->mdpage.pv_list; 765 while ((pve = *prevptr) != NULL) { 766 if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */ 767 *prevptr = pve->pv_next; /* remove it! */ 768 break; 769 } 770 prevptr = &pve->pv_next; /* previous pointer */ 771 } 772 return(pve); /* return removed pve */ 773 } 774 775 /* 776 * p t p f u n c t i o n s 777 */ 778 779 struct vm_page * 780 pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level) 781 { 782 int lidx = level - 1; 783 struct vm_page *pg; 784 785 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] && 786 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) { 787 return (pmap->pm_ptphint[lidx]); 788 } 789 if (lidx == 0) 790 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 791 else { 792 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 793 } 794 return pg; 795 } 796 797 void 798 pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level, 799 struct pg_to_free *pagelist) 800 { 801 int lidx; 802 struct uvm_object *obj; 803 804 lidx = level - 1; 805 806 obj = &pmap->pm_obj[lidx]; 807 pmap->pm_stats.resident_count--; 808 if (pmap->pm_ptphint[lidx] == ptp) 809 pmap->pm_ptphint[lidx] = RB_ROOT(&obj->memt); 810 ptp->wire_count = 0; 811 uvm_pagerealloc(ptp, NULL, 0); 812 TAILQ_INSERT_TAIL(pagelist, ptp, pageq); 813 } 814 815 void 816 pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va, 817 pt_entry_t *ptes, pd_entry_t **pdes, struct pg_to_free *pagelist) 818 { 819 unsigned long index; 820 int level; 821 vaddr_t invaladdr; 822 pd_entry_t opde; 823 824 level = 1; 825 do { 826 pmap_freepage(pmap, ptp, level, pagelist); 827 index = pl_i(va, level + 1); 828 opde = pmap_pte_set(&pdes[level - 1][index], 0); 829 invaladdr = level == 1 ? (vaddr_t)ptes : 830 (vaddr_t)pdes[level - 2]; 831 pmap_tlb_shootpage(curpcb->pcb_pmap, 832 invaladdr + index * PAGE_SIZE); 833 #if defined(MULTIPROCESSOR) 834 invaladdr = level == 1 ? (vaddr_t)PTE_BASE : 835 (vaddr_t)normal_pdes[level - 2]; 836 pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE); 837 #endif 838 if (level < PTP_LEVELS - 1) { 839 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1); 840 ptp->wire_count--; 841 if (ptp->wire_count > 1) 842 break; 843 } 844 } while (++level < PTP_LEVELS); 845 } 846 847 /* 848 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) 849 * 850 * => pmap should NOT be pmap_kernel() 851 */ 852 853 854 struct vm_page * 855 pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t **pdes) 856 { 857 struct vm_page *ptp, *pptp; 858 int i; 859 unsigned long index; 860 pd_entry_t *pva; 861 paddr_t ppa, pa; 862 struct uvm_object *obj; 863 864 ptp = NULL; 865 pa = (paddr_t)-1; 866 867 /* 868 * Loop through all page table levels seeing if we need to 869 * add a new page to that level. 870 */ 871 for (i = PTP_LEVELS; i > 1; i--) { 872 /* 873 * Save values from previous round. 874 */ 875 pptp = ptp; 876 ppa = pa; 877 878 index = pl_i(va, i); 879 pva = pdes[i - 2]; 880 881 if (pmap_valid_entry(pva[index])) { 882 ppa = pva[index] & PG_FRAME; 883 ptp = NULL; 884 continue; 885 } 886 887 obj = &pmap->pm_obj[i-2]; 888 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL, 889 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 890 891 if (ptp == NULL) 892 return NULL; 893 894 atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); 895 ptp->wire_count = 1; 896 pmap->pm_ptphint[i - 2] = ptp; 897 pa = VM_PAGE_TO_PHYS(ptp); 898 pva[index] = (pd_entry_t) (pa | PG_u | PG_RW | PG_V); 899 pmap->pm_stats.resident_count++; 900 /* 901 * If we're not in the top level, increase the 902 * wire count of the parent page. 903 */ 904 if (i < PTP_LEVELS) { 905 if (pptp == NULL) 906 pptp = pmap_find_ptp(pmap, va, ppa, i); 907 #ifdef DIAGNOSTIC 908 if (pptp == NULL) 909 panic("pde page disappeared"); 910 #endif 911 pptp->wire_count++; 912 } 913 } 914 915 /* 916 * ptp is not NULL if we just allocated a new ptp. If it's 917 * still NULL, we must look up the existing one. 918 */ 919 if (ptp == NULL) { 920 ptp = pmap_find_ptp(pmap, va, ppa, 1); 921 #ifdef DIAGNOSTIC 922 if (ptp == NULL) { 923 printf("va %lx ppa %lx\n", (unsigned long)va, 924 (unsigned long)ppa); 925 panic("pmap_get_ptp: unmanaged user PTP"); 926 } 927 #endif 928 } 929 930 pmap->pm_ptphint[0] = ptp; 931 return(ptp); 932 } 933 934 /* 935 * p m a p l i f e c y c l e f u n c t i o n s 936 */ 937 938 /* 939 * pmap_pdp_ctor: constructor for the PDP cache. 940 */ 941 942 void 943 pmap_pdp_ctor(pd_entry_t *pdir) 944 { 945 paddr_t pdirpa; 946 int npde; 947 948 /* fetch the physical address of the page directory. */ 949 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa); 950 951 /* zero init area */ 952 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t)); 953 954 /* put in recursive PDE to map the PTEs */ 955 pdir[PDIR_SLOT_PTE] = pdirpa | PG_V | PG_KW; 956 957 npde = nkptp[PTP_LEVELS - 1]; 958 959 /* put in kernel VM PDEs */ 960 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN], 961 npde * sizeof(pd_entry_t)); 962 963 /* zero the rest */ 964 memset(&pdir[PDIR_SLOT_KERN + npde], 0, 965 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t)); 966 967 pdir[PDIR_SLOT_DIRECT] = pmap_kernel()->pm_pdir[PDIR_SLOT_DIRECT]; 968 969 #if VM_MIN_KERNEL_ADDRESS != KERNBASE 970 pdir[pl4_pi(KERNBASE)] = PDP_BASE[pl4_pi(KERNBASE)]; 971 #endif 972 } 973 974 /* 975 * pmap_create: create a pmap 976 * 977 * => note: old pmap interface took a "size" args which allowed for 978 * the creation of "software only" pmaps (not in bsd). 979 */ 980 981 struct pmap * 982 pmap_create(void) 983 { 984 struct pmap *pmap; 985 int i; 986 987 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); 988 989 /* init uvm_object */ 990 for (i = 0; i < PTP_LEVELS - 1; i++) { 991 uvm_objinit(&pmap->pm_obj[i], NULL, 1); 992 pmap->pm_ptphint[i] = NULL; 993 } 994 pmap->pm_stats.wired_count = 0; 995 pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */ 996 pmap->pm_cpus = 0; 997 998 /* allocate PDP */ 999 1000 /* 1001 * note that there is no need to splvm to protect us from 1002 * malloc since malloc allocates out of a submap and we should 1003 * have already allocated kernel PTPs to cover the range... 1004 */ 1005 1006 pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK); 1007 pmap_pdp_ctor(pmap->pm_pdir); 1008 1009 pmap->pm_pdirpa = pmap->pm_pdir[PDIR_SLOT_PTE] & PG_FRAME; 1010 1011 LIST_INSERT_HEAD(&pmaps, pmap, pm_list); 1012 return (pmap); 1013 } 1014 1015 /* 1016 * pmap_destroy: drop reference count on pmap. free pmap if 1017 * reference count goes to zero. 1018 */ 1019 1020 void 1021 pmap_destroy(struct pmap *pmap) 1022 { 1023 struct vm_page *pg; 1024 int refs; 1025 int i; 1026 1027 /* 1028 * drop reference count 1029 */ 1030 1031 refs = --pmap->pm_obj[0].uo_refs; 1032 if (refs > 0) { 1033 return; 1034 } 1035 1036 /* 1037 * reference count is zero, free pmap resources and then free pmap. 1038 */ 1039 1040 #ifdef DIAGNOSTIC 1041 if (pmap->pm_cpus != 0) 1042 printf("pmap_destroy: pmap %p cpus=0x%llx\n", 1043 (void *)pmap, pmap->pm_cpus); 1044 #endif 1045 1046 /* 1047 * remove it from global list of pmaps 1048 */ 1049 LIST_REMOVE(pmap, pm_list); 1050 1051 /* 1052 * free any remaining PTPs 1053 */ 1054 1055 for (i = 0; i < PTP_LEVELS - 1; i++) { 1056 while ((pg = RB_ROOT(&pmap->pm_obj[i].memt)) != NULL) { 1057 KASSERT((pg->pg_flags & PG_BUSY) == 0); 1058 1059 pg->wire_count = 0; 1060 uvm_pagefree(pg); 1061 } 1062 } 1063 1064 /* 1065 * MULTIPROCESSOR -- no need to flush out of other processors' 1066 * APTE space because we do that in pmap_unmap_ptes(). 1067 */ 1068 /* XXX: need to flush it out of other processor's APTE space? */ 1069 pool_put(&pmap_pdp_pool, pmap->pm_pdir); 1070 1071 pool_put(&pmap_pmap_pool, pmap); 1072 } 1073 1074 /* 1075 * Add a reference to the specified pmap. 1076 */ 1077 1078 void 1079 pmap_reference(struct pmap *pmap) 1080 { 1081 pmap->pm_obj[0].uo_refs++; 1082 } 1083 1084 /* 1085 * pmap_activate: activate a process' pmap (fill in %cr3) 1086 * 1087 * => called from cpu_fork() and when switching pmaps during exec 1088 * => if p is the curproc, then load it into the MMU 1089 */ 1090 1091 void 1092 pmap_activate(struct proc *p) 1093 { 1094 struct pcb *pcb = &p->p_addr->u_pcb; 1095 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 1096 1097 pcb->pcb_pmap = pmap; 1098 pcb->pcb_cr3 = pmap->pm_pdirpa; 1099 if (p == curproc) { 1100 lcr3(pcb->pcb_cr3); 1101 1102 /* 1103 * mark the pmap in use by this processor. 1104 */ 1105 x86_atomic_setbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); 1106 } 1107 } 1108 1109 /* 1110 * pmap_deactivate: deactivate a process' pmap 1111 */ 1112 1113 void 1114 pmap_deactivate(struct proc *p) 1115 { 1116 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 1117 1118 /* 1119 * mark the pmap no longer in use by this processor. 1120 */ 1121 x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); 1122 } 1123 1124 /* 1125 * end of lifecycle functions 1126 */ 1127 1128 /* 1129 * some misc. functions 1130 */ 1131 1132 boolean_t 1133 pmap_pdes_valid(vaddr_t va, pd_entry_t **pdes, pd_entry_t *lastpde) 1134 { 1135 int i; 1136 unsigned long index; 1137 pd_entry_t pde; 1138 1139 for (i = PTP_LEVELS; i > 1; i--) { 1140 index = pl_i(va, i); 1141 pde = pdes[i - 2][index]; 1142 if ((pde & PG_V) == 0) 1143 return FALSE; 1144 } 1145 if (lastpde != NULL) 1146 *lastpde = pde; 1147 return TRUE; 1148 } 1149 1150 /* 1151 * pmap_extract: extract a PA for the given VA 1152 */ 1153 1154 boolean_t 1155 pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap) 1156 { 1157 pt_entry_t *ptes, pte; 1158 pd_entry_t pde, **pdes; 1159 1160 if (pmap == pmap_kernel() && va >= PMAP_DIRECT_BASE && 1161 va < PMAP_DIRECT_END) { 1162 *pap = va - PMAP_DIRECT_BASE; 1163 return (TRUE); 1164 } 1165 1166 pmap_map_ptes(pmap, &ptes, &pdes); 1167 if (pmap_pdes_valid(va, pdes, &pde) == FALSE) { 1168 return FALSE; 1169 } 1170 1171 if (pde & PG_PS) { 1172 if (pap != NULL) 1173 *pap = (pde & PG_LGFRAME) | (va & 0x1fffff); 1174 pmap_unmap_ptes(pmap); 1175 return (TRUE); 1176 } 1177 1178 pte = ptes[pl1_i(va)]; 1179 pmap_unmap_ptes(pmap); 1180 1181 if (__predict_true((pte & PG_V) != 0)) { 1182 if (pap != NULL) 1183 *pap = (pte & PG_FRAME) | (va & 0xfff); 1184 return (TRUE); 1185 } 1186 1187 return FALSE; 1188 } 1189 1190 /* 1191 * pmap_zero_page: zero a page 1192 */ 1193 1194 void 1195 pmap_zero_page(struct vm_page *pg) 1196 { 1197 pagezero(pmap_map_direct(pg)); 1198 } 1199 1200 /* 1201 * pmap_flush_cache: flush the cache for a virtual address. 1202 */ 1203 void 1204 pmap_flush_cache(vaddr_t addr, vsize_t len) 1205 { 1206 vaddr_t i; 1207 1208 if (curcpu()->ci_cflushsz == 0) { 1209 wbinvd(); 1210 return; 1211 } 1212 1213 /* all cpus that have clflush also have mfence. */ 1214 mfence(); 1215 for (i = addr; i < addr + len; i += curcpu()->ci_cflushsz) 1216 clflush(i); 1217 mfence(); 1218 } 1219 1220 /* 1221 * pmap_pagezeroidle: the same, for the idle loop page zero'er. 1222 * Returns TRUE if the page was zero'd, FALSE if we aborted for 1223 * some reason. 1224 */ 1225 1226 boolean_t 1227 pmap_pageidlezero(struct vm_page *pg) 1228 { 1229 vaddr_t va = pmap_map_direct(pg); 1230 boolean_t rv = TRUE; 1231 long *ptr; 1232 int i; 1233 1234 /* 1235 * XXX - We'd really like to do this uncached. But at this moment 1236 * we're never called, so just pretend that this works. 1237 * It shouldn't be too hard to create a second direct map 1238 * with uncached mappings. 1239 */ 1240 for (i = 0, ptr = (long *) va; i < PAGE_SIZE / sizeof(long); i++) { 1241 if (!curcpu_is_idle()) { 1242 1243 /* 1244 * A process has become ready. Abort now, 1245 * so we don't keep it waiting while we 1246 * do slow memory access to finish this 1247 * page. 1248 */ 1249 1250 rv = FALSE; 1251 break; 1252 } 1253 *ptr++ = 0; 1254 } 1255 1256 return (rv); 1257 } 1258 1259 /* 1260 * pmap_copy_page: copy a page 1261 */ 1262 1263 void 1264 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 1265 { 1266 vaddr_t srcva = pmap_map_direct(srcpg); 1267 vaddr_t dstva = pmap_map_direct(dstpg); 1268 1269 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE); 1270 } 1271 1272 /* 1273 * p m a p r e m o v e f u n c t i o n s 1274 * 1275 * functions that remove mappings 1276 */ 1277 1278 /* 1279 * pmap_remove_ptes: remove PTEs from a PTP 1280 * 1281 * => must have proper locking on pmap_master_lock 1282 * => PTP must be mapped into KVA 1283 * => PTP should be null if pmap == pmap_kernel() 1284 */ 1285 1286 void 1287 pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, 1288 vaddr_t startva, vaddr_t endva, int flags) 1289 { 1290 struct pv_entry *pve; 1291 pt_entry_t *pte = (pt_entry_t *) ptpva; 1292 struct vm_page *pg; 1293 pt_entry_t opte; 1294 1295 /* 1296 * note that ptpva points to the PTE that maps startva. this may 1297 * or may not be the first PTE in the PTP. 1298 * 1299 * we loop through the PTP while there are still PTEs to look at 1300 * and the wire_count is greater than 1 (because we use the wire_count 1301 * to keep track of the number of real PTEs in the PTP). 1302 */ 1303 1304 for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) 1305 ; pte++, startva += PAGE_SIZE) { 1306 if (!pmap_valid_entry(*pte)) 1307 continue; /* VA not mapped */ 1308 if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { 1309 continue; 1310 } 1311 1312 /* atomically save the old PTE and zap! it */ 1313 opte = pmap_pte_set(pte, 0); 1314 1315 if (opte & PG_W) 1316 pmap->pm_stats.wired_count--; 1317 pmap->pm_stats.resident_count--; 1318 1319 if (ptp) 1320 ptp->wire_count--; /* dropping a PTE */ 1321 1322 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 1323 1324 /* 1325 * if we are not on a pv list we are done. 1326 */ 1327 1328 if ((opte & PG_PVLIST) == 0) { 1329 #ifdef DIAGNOSTIC 1330 if (pg != NULL) 1331 panic("pmap_remove_ptes: managed page without " 1332 "PG_PVLIST for 0x%lx", startva); 1333 #endif 1334 continue; 1335 } 1336 1337 #ifdef DIAGNOSTIC 1338 if (pg == NULL) 1339 panic("pmap_remove_ptes: unmanaged page marked " 1340 "PG_PVLIST, va = 0x%lx, pa = 0x%lx", 1341 startva, (u_long)(opte & PG_FRAME)); 1342 #endif 1343 1344 /* sync R/M bits */ 1345 pmap_sync_flags_pte(pg, opte); 1346 pve = pmap_remove_pv(pg, pmap, startva); 1347 1348 if (pve) { 1349 pool_put(&pmap_pv_pool, pve); 1350 } 1351 1352 /* end of "for" loop: time for next pte */ 1353 } 1354 } 1355 1356 1357 /* 1358 * pmap_remove_pte: remove a single PTE from a PTP 1359 * 1360 * => must have proper locking on pmap_master_lock 1361 * => PTP must be mapped into KVA 1362 * => PTP should be null if pmap == pmap_kernel() 1363 * => returns true if we removed a mapping 1364 */ 1365 1366 boolean_t 1367 pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, 1368 vaddr_t va, int flags) 1369 { 1370 struct pv_entry *pve; 1371 struct vm_page *pg; 1372 pt_entry_t opte; 1373 1374 if (!pmap_valid_entry(*pte)) 1375 return(FALSE); /* VA not mapped */ 1376 if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { 1377 return(FALSE); 1378 } 1379 1380 /* atomically save the old PTE and zap! it */ 1381 opte = pmap_pte_set(pte, 0); 1382 1383 if (opte & PG_W) 1384 pmap->pm_stats.wired_count--; 1385 pmap->pm_stats.resident_count--; 1386 1387 if (ptp) 1388 ptp->wire_count--; /* dropping a PTE */ 1389 1390 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 1391 1392 /* 1393 * if we are not on a pv list we are done. 1394 */ 1395 if ((opte & PG_PVLIST) == 0) { 1396 #ifdef DIAGNOSTIC 1397 if (pg != NULL) 1398 panic("pmap_remove_pte: managed page without " 1399 "PG_PVLIST for 0x%lx", va); 1400 #endif 1401 return(TRUE); 1402 } 1403 1404 #ifdef DIAGNOSTIC 1405 if (pg == NULL) 1406 panic("pmap_remove_pte: unmanaged page marked " 1407 "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, 1408 (u_long)(opte & PG_FRAME)); 1409 #endif 1410 1411 /* sync R/M bits */ 1412 pmap_sync_flags_pte(pg, opte); 1413 pve = pmap_remove_pv(pg, pmap, va); 1414 if (pve) 1415 pool_put(&pmap_pv_pool, pve); 1416 return(TRUE); 1417 } 1418 1419 /* 1420 * pmap_remove: top level mapping removal function 1421 * 1422 * => caller should not be holding any pmap locks 1423 */ 1424 1425 void 1426 pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 1427 { 1428 pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL); 1429 } 1430 1431 /* 1432 * pmap_do_remove: mapping removal guts 1433 * 1434 * => caller should not be holding any pmap locks 1435 */ 1436 1437 void 1438 pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) 1439 { 1440 pt_entry_t *ptes; 1441 pd_entry_t **pdes, pde; 1442 boolean_t result; 1443 paddr_t ptppa; 1444 vaddr_t blkendva; 1445 struct vm_page *ptp; 1446 vaddr_t va; 1447 int shootall = 0; 1448 struct pg_to_free empty_ptps; 1449 1450 TAILQ_INIT(&empty_ptps); 1451 1452 PMAP_MAP_TO_HEAD_LOCK(); 1453 pmap_map_ptes(pmap, &ptes, &pdes); 1454 1455 /* 1456 * removing one page? take shortcut function. 1457 */ 1458 1459 if (sva + PAGE_SIZE == eva) { 1460 if (pmap_pdes_valid(sva, pdes, &pde)) { 1461 1462 /* PA of the PTP */ 1463 ptppa = pde & PG_FRAME; 1464 1465 /* get PTP if non-kernel mapping */ 1466 1467 if (pmap == pmap_kernel()) { 1468 /* we never free kernel PTPs */ 1469 ptp = NULL; 1470 } else { 1471 ptp = pmap_find_ptp(pmap, sva, ptppa, 1); 1472 #ifdef DIAGNOSTIC 1473 if (ptp == NULL) 1474 panic("pmap_remove: unmanaged " 1475 "PTP detected"); 1476 #endif 1477 } 1478 1479 /* do it! */ 1480 result = pmap_remove_pte(pmap, ptp, 1481 &ptes[pl1_i(sva)], sva, flags); 1482 1483 /* 1484 * if mapping removed and the PTP is no longer 1485 * being used, free it! 1486 */ 1487 1488 if (result && ptp && ptp->wire_count <= 1) 1489 pmap_free_ptp(pmap, ptp, sva, ptes, pdes, 1490 &empty_ptps); 1491 pmap_tlb_shootpage(pmap, sva); 1492 } 1493 1494 pmap_tlb_shootwait(); 1495 pmap_unmap_ptes(pmap); 1496 PMAP_MAP_TO_HEAD_UNLOCK(); 1497 1498 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1499 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1500 uvm_pagefree(ptp); 1501 } 1502 1503 return; 1504 } 1505 1506 if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) 1507 shootall = 1; 1508 1509 for (va = sva; va < eva; va = blkendva) { 1510 /* determine range of block */ 1511 blkendva = x86_round_pdr(va + 1); 1512 if (blkendva > eva) 1513 blkendva = eva; 1514 1515 /* 1516 * XXXCDC: our PTE mappings should never be removed 1517 * with pmap_remove! if we allow this (and why would 1518 * we?) then we end up freeing the pmap's page 1519 * directory page (PDP) before we are finished using 1520 * it when we hit in in the recursive mapping. this 1521 * is BAD. 1522 * 1523 * long term solution is to move the PTEs out of user 1524 * address space. and into kernel address space (up 1525 * with APTE). then we can set VM_MAXUSER_ADDRESS to 1526 * be VM_MAX_ADDRESS. 1527 */ 1528 1529 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE) 1530 /* XXXCDC: ugly hack to avoid freeing PDP here */ 1531 continue; 1532 1533 if (!pmap_pdes_valid(va, pdes, &pde)) 1534 continue; 1535 1536 /* PA of the PTP */ 1537 ptppa = pde & PG_FRAME; 1538 1539 /* get PTP if non-kernel mapping */ 1540 if (pmap == pmap_kernel()) { 1541 /* we never free kernel PTPs */ 1542 ptp = NULL; 1543 } else { 1544 ptp = pmap_find_ptp(pmap, va, ptppa, 1); 1545 #ifdef DIAGNOSTIC 1546 if (ptp == NULL) 1547 panic("pmap_remove: unmanaged PTP " 1548 "detected"); 1549 #endif 1550 } 1551 pmap_remove_ptes(pmap, ptp, 1552 (vaddr_t)&ptes[pl1_i(va)], va, blkendva, flags); 1553 1554 /* if PTP is no longer being used, free it! */ 1555 if (ptp && ptp->wire_count <= 1) { 1556 pmap_free_ptp(pmap, ptp, va, ptes, pdes, &empty_ptps); 1557 } 1558 } 1559 1560 if (shootall) 1561 pmap_tlb_shoottlb(); 1562 else 1563 pmap_tlb_shootrange(pmap, sva, eva); 1564 1565 pmap_tlb_shootwait(); 1566 1567 pmap_unmap_ptes(pmap); 1568 PMAP_MAP_TO_HEAD_UNLOCK(); 1569 1570 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1571 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1572 uvm_pagefree(ptp); 1573 } 1574 } 1575 1576 /* 1577 * pmap_page_remove: remove a managed vm_page from all pmaps that map it 1578 * 1579 * => R/M bits are sync'd back to attrs 1580 */ 1581 1582 void 1583 pmap_page_remove(struct vm_page *pg) 1584 { 1585 struct pv_entry *pve; 1586 pt_entry_t *ptes, opte; 1587 pd_entry_t **pdes; 1588 #ifdef DIAGNOSTIC 1589 pd_entry_t pde; 1590 #endif 1591 struct pg_to_free empty_ptps; 1592 struct vm_page *ptp; 1593 1594 TAILQ_INIT(&empty_ptps); 1595 1596 PMAP_HEAD_TO_MAP_LOCK(); 1597 1598 while ((pve = pg->mdpage.pv_list) != NULL) { 1599 pg->mdpage.pv_list = pve->pv_next; 1600 1601 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1602 1603 #ifdef DIAGNOSTIC 1604 if (pve->pv_ptp && pmap_pdes_valid(pve->pv_va, pdes, &pde) && 1605 (pde & PG_FRAME) != VM_PAGE_TO_PHYS(pve->pv_ptp)) { 1606 printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", 1607 pg, pve->pv_va, pve->pv_ptp); 1608 printf("pmap_page_remove: PTP's phys addr: " 1609 "actual=%lx, recorded=%lx\n", 1610 (unsigned long)(pde & PG_FRAME), 1611 VM_PAGE_TO_PHYS(pve->pv_ptp)); 1612 panic("pmap_page_remove: mapped managed page has " 1613 "invalid pv_ptp field"); 1614 } 1615 #endif 1616 1617 /* atomically save the old PTE and zap it */ 1618 opte = pmap_pte_set(&ptes[pl1_i(pve->pv_va)], 0); 1619 1620 if (opte & PG_W) 1621 pve->pv_pmap->pm_stats.wired_count--; 1622 pve->pv_pmap->pm_stats.resident_count--; 1623 1624 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); 1625 1626 pmap_sync_flags_pte(pg, opte); 1627 1628 /* update the PTP reference count. free if last reference. */ 1629 if (pve->pv_ptp) { 1630 pve->pv_ptp->wire_count--; 1631 if (pve->pv_ptp->wire_count <= 1) { 1632 pmap_free_ptp(pve->pv_pmap, pve->pv_ptp, 1633 pve->pv_va, ptes, pdes, &empty_ptps); 1634 } 1635 } 1636 pmap_unmap_ptes(pve->pv_pmap); 1637 pool_put(&pmap_pv_pool, pve); 1638 } 1639 1640 PMAP_HEAD_TO_MAP_UNLOCK(); 1641 pmap_tlb_shootwait(); 1642 1643 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1644 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1645 uvm_pagefree(ptp); 1646 } 1647 } 1648 1649 /* 1650 * p m a p a t t r i b u t e f u n c t i o n s 1651 * functions that test/change managed page's attributes 1652 * since a page can be mapped multiple times we must check each PTE that 1653 * maps it by going down the pv lists. 1654 */ 1655 1656 /* 1657 * pmap_test_attrs: test a page's attributes 1658 */ 1659 1660 boolean_t 1661 pmap_test_attrs(struct vm_page *pg, unsigned int testbits) 1662 { 1663 struct pv_entry *pve; 1664 pt_entry_t *ptes, pte; 1665 pd_entry_t **pdes; 1666 u_long mybits, testflags; 1667 1668 testflags = pmap_pte2flags(testbits); 1669 1670 if (pg->pg_flags & testflags) 1671 return (TRUE); 1672 1673 PMAP_HEAD_TO_MAP_LOCK(); 1674 mybits = 0; 1675 for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0; 1676 pve = pve->pv_next) { 1677 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1678 pte = ptes[pl1_i(pve->pv_va)]; 1679 pmap_unmap_ptes(pve->pv_pmap); 1680 mybits |= (pte & testbits); 1681 } 1682 PMAP_HEAD_TO_MAP_UNLOCK(); 1683 1684 if (mybits == 0) 1685 return (FALSE); 1686 1687 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(mybits)); 1688 1689 return (TRUE); 1690 } 1691 1692 /* 1693 * pmap_clear_attrs: change a page's attributes 1694 * 1695 * => we return TRUE if we cleared one of the bits we were asked to 1696 */ 1697 1698 boolean_t 1699 pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits) 1700 { 1701 struct pv_entry *pve; 1702 pt_entry_t *ptes, opte; 1703 pd_entry_t **pdes; 1704 u_long clearflags; 1705 int result; 1706 1707 clearflags = pmap_pte2flags(clearbits); 1708 1709 PMAP_HEAD_TO_MAP_LOCK(); 1710 1711 result = pg->pg_flags & clearflags; 1712 if (result) 1713 atomic_clearbits_int(&pg->pg_flags, clearflags); 1714 1715 for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) { 1716 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1717 #ifdef DIAGNOSTIC 1718 if (!pmap_pdes_valid(pve->pv_va, pdes, NULL)) 1719 panic("pmap_change_attrs: mapping without PTP " 1720 "detected"); 1721 #endif 1722 1723 opte = ptes[pl1_i(pve->pv_va)]; 1724 if (opte & clearbits) { 1725 result = 1; 1726 pmap_pte_clearbits(&ptes[pl1_i(pve->pv_va)], 1727 (opte & clearbits)); 1728 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); 1729 } 1730 pmap_unmap_ptes(pve->pv_pmap); 1731 } 1732 1733 PMAP_HEAD_TO_MAP_UNLOCK(); 1734 1735 pmap_tlb_shootwait(); 1736 1737 return (result != 0); 1738 } 1739 1740 /* 1741 * p m a p p r o t e c t i o n f u n c t i o n s 1742 */ 1743 1744 /* 1745 * pmap_page_protect: change the protection of all recorded mappings 1746 * of a managed page 1747 * 1748 * => NOTE: this is an inline function in pmap.h 1749 */ 1750 1751 /* see pmap.h */ 1752 1753 /* 1754 * pmap_protect: set the protection in of the pages in a pmap 1755 * 1756 * => NOTE: this is an inline function in pmap.h 1757 */ 1758 1759 /* see pmap.h */ 1760 1761 /* 1762 * pmap_write_protect: write-protect pages in a pmap 1763 */ 1764 1765 void 1766 pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1767 { 1768 pt_entry_t nx, *ptes, *spte, *epte; 1769 pd_entry_t **pdes; 1770 vaddr_t blockend; 1771 int shootall = 0; 1772 vaddr_t va; 1773 1774 pmap_map_ptes(pmap, &ptes, &pdes); 1775 1776 /* should be ok, but just in case ... */ 1777 sva &= PG_FRAME; 1778 eva &= PG_FRAME; 1779 1780 nx = 0; 1781 if ((cpu_feature & CPUID_NXE) && !(prot & VM_PROT_EXECUTE)) 1782 nx = PG_NX; 1783 1784 if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) 1785 shootall = 1; 1786 1787 for (va = sva; va < eva ; va = blockend) { 1788 blockend = (va & L2_FRAME) + NBPD_L2; 1789 if (blockend > eva) 1790 blockend = eva; 1791 1792 /* 1793 * XXXCDC: our PTE mappings should never be write-protected! 1794 * 1795 * long term solution is to move the PTEs out of user 1796 * address space. and into kernel address space (up 1797 * with APTE). then we can set VM_MAXUSER_ADDRESS to 1798 * be VM_MAX_ADDRESS. 1799 */ 1800 1801 /* XXXCDC: ugly hack to avoid freeing PDP here */ 1802 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE) 1803 continue; 1804 1805 /* empty block? */ 1806 if (!pmap_pdes_valid(va, pdes, NULL)) 1807 continue; 1808 1809 #ifdef DIAGNOSTIC 1810 if (va >= VM_MAXUSER_ADDRESS && va < VM_MAX_ADDRESS) 1811 panic("pmap_write_protect: PTE space"); 1812 #endif 1813 1814 spte = &ptes[pl1_i(va)]; 1815 epte = &ptes[pl1_i(blockend)]; 1816 1817 for (/*null */; spte < epte ; spte++) { 1818 if (!(*spte & PG_V)) 1819 continue; 1820 pmap_pte_clearbits(spte, PG_RW); 1821 pmap_pte_setbits(spte, nx); 1822 } 1823 } 1824 1825 if (shootall) 1826 pmap_tlb_shoottlb(); 1827 else 1828 pmap_tlb_shootrange(pmap, sva, eva); 1829 1830 pmap_tlb_shootwait(); 1831 1832 pmap_unmap_ptes(pmap); 1833 } 1834 1835 /* 1836 * end of protection functions 1837 */ 1838 1839 /* 1840 * pmap_unwire: clear the wired bit in the PTE 1841 * 1842 * => mapping should already be in map 1843 */ 1844 1845 void 1846 pmap_unwire(struct pmap *pmap, vaddr_t va) 1847 { 1848 pt_entry_t *ptes; 1849 pd_entry_t **pdes; 1850 1851 pmap_map_ptes(pmap, &ptes, &pdes); 1852 1853 if (pmap_pdes_valid(va, pdes, NULL)) { 1854 1855 #ifdef DIAGNOSTIC 1856 if (!pmap_valid_entry(ptes[pl1_i(va)])) 1857 panic("pmap_unwire: invalid (unmapped) va 0x%lx", va); 1858 #endif 1859 if ((ptes[pl1_i(va)] & PG_W) != 0) { 1860 pmap_pte_clearbits(&ptes[pl1_i(va)], PG_W); 1861 pmap->pm_stats.wired_count--; 1862 } 1863 #ifdef DIAGNOSTIC 1864 else { 1865 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 1866 "didn't change!\n", pmap, va); 1867 } 1868 #endif 1869 pmap_unmap_ptes(pmap); 1870 } 1871 #ifdef DIAGNOSTIC 1872 else { 1873 panic("pmap_unwire: invalid PDE"); 1874 } 1875 #endif 1876 } 1877 1878 /* 1879 * pmap_collect: free resources held by a pmap 1880 * 1881 * => optional function. 1882 * => called when a process is swapped out to free memory. 1883 */ 1884 1885 void 1886 pmap_collect(struct pmap *pmap) 1887 { 1888 /* 1889 * free all of the pt pages by removing the physical mappings 1890 * for its entire address space. 1891 */ 1892 1893 /* pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, 1894 PMAP_REMOVE_SKIPWIRED); 1895 */ 1896 } 1897 1898 /* 1899 * pmap_copy: copy mappings from one pmap to another 1900 * 1901 * => optional function 1902 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1903 */ 1904 1905 /* 1906 * defined as macro in pmap.h 1907 */ 1908 1909 /* 1910 * pmap_enter: enter a mapping into a pmap 1911 * 1912 * => must be done "now" ... no lazy-evaluation 1913 */ 1914 1915 int 1916 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 1917 { 1918 pt_entry_t *ptes, opte, npte; 1919 pd_entry_t **pdes; 1920 struct vm_page *ptp, *pg = NULL; 1921 struct pv_entry *pve = NULL; 1922 int ptpdelta, wireddelta, resdelta; 1923 boolean_t wired = (flags & PMAP_WIRED) != 0; 1924 boolean_t nocache = (pa & PMAP_NOCACHE) != 0; 1925 boolean_t wc = (pa & PMAP_WC) != 0; 1926 int error; 1927 1928 KASSERT(!(wc && nocache)); 1929 pa &= PMAP_PA_MASK; 1930 1931 #ifdef DIAGNOSTIC 1932 if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) 1933 panic("pmap_enter: trying to map over PDP/APDP!"); 1934 1935 /* sanity check: kernel PTPs should already have been pre-allocated */ 1936 if (va >= VM_MIN_KERNEL_ADDRESS && 1937 !pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)])) 1938 panic("pmap_enter: missing kernel PTP for va %lx!", va); 1939 1940 #endif 1941 1942 /* get lock */ 1943 PMAP_MAP_TO_HEAD_LOCK(); 1944 1945 /* 1946 * map in ptes and get a pointer to our PTP (unless we are the kernel) 1947 */ 1948 1949 pmap_map_ptes(pmap, &ptes, &pdes); 1950 if (pmap == pmap_kernel()) { 1951 ptp = NULL; 1952 } else { 1953 ptp = pmap_get_ptp(pmap, va, pdes); 1954 if (ptp == NULL) { 1955 if (flags & PMAP_CANFAIL) { 1956 error = ENOMEM; 1957 goto out; 1958 } 1959 panic("pmap_enter: get ptp failed"); 1960 } 1961 } 1962 opte = ptes[pl1_i(va)]; /* old PTE */ 1963 1964 /* 1965 * is there currently a valid mapping at our VA? 1966 */ 1967 1968 if (pmap_valid_entry(opte)) { 1969 /* 1970 * first, calculate pm_stats updates. resident count will not 1971 * change since we are replacing/changing a valid mapping. 1972 * wired count might change... 1973 */ 1974 1975 resdelta = 0; 1976 if (wired && (opte & PG_W) == 0) 1977 wireddelta = 1; 1978 else if (!wired && (opte & PG_W) != 0) 1979 wireddelta = -1; 1980 else 1981 wireddelta = 0; 1982 ptpdelta = 0; 1983 1984 /* 1985 * is the currently mapped PA the same as the one we 1986 * want to map? 1987 */ 1988 1989 if ((opte & PG_FRAME) == pa) { 1990 1991 /* if this is on the PVLIST, sync R/M bit */ 1992 if (opte & PG_PVLIST) { 1993 pg = PHYS_TO_VM_PAGE(pa); 1994 #ifdef DIAGNOSTIC 1995 if (pg == NULL) 1996 panic("pmap_enter: same pa PG_PVLIST " 1997 "mapping with unmanaged page " 1998 "pa = 0x%lx (0x%lx)", pa, 1999 atop(pa)); 2000 #endif 2001 pmap_sync_flags_pte(pg, opte); 2002 } else { 2003 #ifdef DIAGNOSTIC 2004 if (PHYS_TO_VM_PAGE(pa) != NULL) 2005 panic("pmap_enter: same pa, managed " 2006 "page, no PG_VLIST pa: 0x%lx\n", 2007 pa); 2008 #endif 2009 } 2010 goto enter_now; 2011 } 2012 2013 /* 2014 * changing PAs: we must remove the old one first 2015 */ 2016 2017 /* 2018 * if current mapping is on a pvlist, 2019 * remove it (sync R/M bits) 2020 */ 2021 2022 if (opte & PG_PVLIST) { 2023 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 2024 #ifdef DIAGNOSTIC 2025 if (pg == NULL) 2026 panic("pmap_enter: PG_PVLIST mapping with " 2027 "unmanaged page " 2028 "pa = 0x%lx (0x%lx)", pa, atop(pa)); 2029 #endif 2030 pmap_sync_flags_pte(pg, opte); 2031 pve = pmap_remove_pv(pg, pmap, va); 2032 pg = NULL; /* This is not the page we are looking for */ 2033 } 2034 } else { /* opte not valid */ 2035 pve = NULL; 2036 resdelta = 1; 2037 if (wired) 2038 wireddelta = 1; 2039 else 2040 wireddelta = 0; 2041 if (ptp) 2042 ptpdelta = 1; 2043 else 2044 ptpdelta = 0; 2045 } 2046 2047 /* 2048 * pve is either NULL or points to a now-free pv_entry structure 2049 * (the latter case is if we called pmap_remove_pv above). 2050 * 2051 * if this entry is to be on a pvlist, enter it now. 2052 */ 2053 2054 if (pmap_initialized) 2055 pg = PHYS_TO_VM_PAGE(pa); 2056 2057 if (pg != NULL) { 2058 if (pve == NULL) { 2059 pve = pool_get(&pmap_pv_pool, PR_NOWAIT); 2060 if (pve == NULL) { 2061 if (flags & PMAP_CANFAIL) { 2062 error = ENOMEM; 2063 goto out; 2064 } 2065 panic("pmap_enter: no pv entries available"); 2066 } 2067 } 2068 pmap_enter_pv(pg, pve, pmap, va, ptp); 2069 } else { 2070 /* new mapping is not PG_PVLIST. free pve if we've got one */ 2071 if (pve) 2072 pool_put(&pmap_pv_pool, pve); 2073 } 2074 2075 enter_now: 2076 /* 2077 * at this point pg is !NULL if we want the PG_PVLIST bit set 2078 */ 2079 2080 pmap->pm_stats.resident_count += resdelta; 2081 pmap->pm_stats.wired_count += wireddelta; 2082 if (ptp) 2083 ptp->wire_count += ptpdelta; 2084 2085 if (pg != PHYS_TO_VM_PAGE(pa)) 2086 panic("wtf?"); 2087 2088 npte = pa | protection_codes[prot] | PG_V; 2089 if (pg != NULL) { 2090 npte |= PG_PVLIST; 2091 /* 2092 * make sure that if the page is write combined all 2093 * instances of pmap_enter make it so. 2094 */ 2095 if (pg->pg_flags & PG_PMAP_WC) { 2096 KASSERT(nocache == 0); 2097 wc = TRUE; 2098 } 2099 } 2100 if (wc) 2101 npte |= pmap_pg_wc; 2102 if (wired) 2103 npte |= PG_W; 2104 if (nocache) 2105 npte |= PG_N; 2106 if (va < VM_MAXUSER_ADDRESS) 2107 npte |= PG_u; 2108 else if (va < VM_MAX_ADDRESS) 2109 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ 2110 if (pmap == pmap_kernel()) 2111 npte |= PG_G; 2112 2113 ptes[pl1_i(va)] = npte; /* zap! */ 2114 2115 /* 2116 * If we changed anything other than modified/used bits, 2117 * flush the TLB. (is this overkill?) 2118 */ 2119 if (opte & PG_V) { 2120 if (nocache && (opte & PG_N) == 0) 2121 wbinvd(); 2122 pmap_tlb_shootpage(pmap, va); 2123 pmap_tlb_shootwait(); 2124 } 2125 2126 error = 0; 2127 2128 out: 2129 pmap_unmap_ptes(pmap); 2130 PMAP_MAP_TO_HEAD_UNLOCK(); 2131 2132 return error; 2133 } 2134 2135 boolean_t 2136 pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp) 2137 { 2138 struct vm_page *ptp; 2139 struct pmap *kpm = pmap_kernel(); 2140 2141 if (uvm.page_init_done == FALSE) { 2142 vaddr_t va; 2143 2144 /* 2145 * we're growing the kernel pmap early (from 2146 * uvm_pageboot_alloc()). this case must be 2147 * handled a little differently. 2148 */ 2149 2150 va = pmap_steal_memory(PAGE_SIZE, NULL, NULL); 2151 *paddrp = PMAP_DIRECT_UNMAP(va); 2152 } else { 2153 ptp = uvm_pagealloc(&kpm->pm_obj[level - 1], 2154 ptp_va2o(va, level), NULL, 2155 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2156 if (ptp == NULL) 2157 panic("pmap_get_physpage: out of memory"); 2158 atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); 2159 ptp->wire_count = 1; 2160 *paddrp = VM_PAGE_TO_PHYS(ptp); 2161 } 2162 kpm->pm_stats.resident_count++; 2163 return TRUE; 2164 } 2165 2166 /* 2167 * Allocate the amount of specified ptps for a ptp level, and populate 2168 * all levels below accordingly, mapping virtual addresses starting at 2169 * kva. 2170 * 2171 * Used by pmap_growkernel. 2172 */ 2173 void 2174 pmap_alloc_level(pd_entry_t **pdes, vaddr_t kva, int lvl, long *needed_ptps) 2175 { 2176 unsigned long i; 2177 vaddr_t va; 2178 paddr_t pa; 2179 unsigned long index, endindex; 2180 int level; 2181 pd_entry_t *pdep; 2182 2183 for (level = lvl; level > 1; level--) { 2184 if (level == PTP_LEVELS) 2185 pdep = pmap_kernel()->pm_pdir; 2186 else 2187 pdep = pdes[level - 2]; 2188 va = kva; 2189 index = pl_i(kva, level); 2190 endindex = index + needed_ptps[level - 1]; 2191 /* 2192 * XXX special case for first time call. 2193 */ 2194 if (nkptp[level - 1] != 0) 2195 index++; 2196 else 2197 endindex--; 2198 2199 for (i = index; i <= endindex; i++) { 2200 pmap_get_physpage(va, level - 1, &pa); 2201 pdep[i] = pa | PG_RW | PG_V; 2202 nkptp[level - 1]++; 2203 va += nbpd[level - 1]; 2204 } 2205 } 2206 } 2207 2208 /* 2209 * pmap_growkernel: increase usage of KVM space 2210 * 2211 * => we allocate new PTPs for the kernel and install them in all 2212 * the pmaps on the system. 2213 */ 2214 2215 static vaddr_t pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS; 2216 2217 vaddr_t 2218 pmap_growkernel(vaddr_t maxkvaddr) 2219 { 2220 struct pmap *kpm = pmap_kernel(), *pm; 2221 int s, i; 2222 unsigned newpdes; 2223 long needed_kptp[PTP_LEVELS], target_nptp, old; 2224 2225 if (maxkvaddr <= pmap_maxkvaddr) 2226 return pmap_maxkvaddr; 2227 2228 maxkvaddr = x86_round_pdr(maxkvaddr); 2229 old = nkptp[PTP_LEVELS - 1]; 2230 /* 2231 * This loop could be optimized more, but pmap_growkernel() 2232 * is called infrequently. 2233 */ 2234 for (i = PTP_LEVELS - 1; i >= 1; i--) { 2235 target_nptp = pl_i(maxkvaddr, i + 1) - 2236 pl_i(VM_MIN_KERNEL_ADDRESS, i + 1); 2237 /* 2238 * XXX only need to check toplevel. 2239 */ 2240 if (target_nptp > nkptpmax[i]) 2241 panic("out of KVA space"); 2242 needed_kptp[i] = target_nptp - nkptp[i] + 1; 2243 } 2244 2245 2246 s = splhigh(); /* to be safe */ 2247 pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, 2248 needed_kptp); 2249 2250 /* 2251 * If the number of top level entries changed, update all 2252 * pmaps. 2253 */ 2254 if (needed_kptp[PTP_LEVELS - 1] != 0) { 2255 newpdes = nkptp[PTP_LEVELS - 1] - old; 2256 LIST_FOREACH(pm, &pmaps, pm_list) { 2257 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], 2258 &kpm->pm_pdir[PDIR_SLOT_KERN + old], 2259 newpdes * sizeof (pd_entry_t)); 2260 } 2261 2262 /* Invalidate the PDP cache. */ 2263 #if 0 2264 pool_cache_invalidate(&pmap_pdp_cache); 2265 #endif 2266 } 2267 pmap_maxkvaddr = maxkvaddr; 2268 splx(s); 2269 2270 return maxkvaddr; 2271 } 2272 2273 vaddr_t 2274 pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end) 2275 { 2276 int segno; 2277 u_int npg; 2278 vaddr_t va; 2279 paddr_t pa; 2280 struct vm_physseg *seg; 2281 2282 size = round_page(size); 2283 npg = atop(size); 2284 2285 for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) { 2286 if (seg->avail_end - seg->avail_start < npg) 2287 continue; 2288 /* 2289 * We can only steal at an ``unused'' segment boundary, 2290 * i.e. either at the start or at the end. 2291 */ 2292 if (seg->avail_start == seg->start || 2293 seg->avail_end == seg->end) 2294 break; 2295 } 2296 if (segno == vm_nphysseg) { 2297 panic("pmap_steal_memory: out of memory"); 2298 } else { 2299 if (seg->avail_start == seg->start) { 2300 pa = ptoa(seg->avail_start); 2301 seg->avail_start += npg; 2302 seg->start += npg; 2303 } else { 2304 pa = ptoa(seg->avail_end) - size; 2305 seg->avail_end -= npg; 2306 seg->end -= npg; 2307 } 2308 /* 2309 * If all the segment has been consumed now, remove it. 2310 * Note that the crash dump code still knows about it 2311 * and will dump it correctly. 2312 */ 2313 if (seg->start == seg->end) { 2314 if (vm_nphysseg-- == 1) 2315 panic("pmap_steal_memory: out of memory"); 2316 while (segno < vm_nphysseg) { 2317 seg[0] = seg[1]; /* struct copy */ 2318 seg++; 2319 segno++; 2320 } 2321 } 2322 2323 va = PMAP_DIRECT_MAP(pa); 2324 memset((void *)va, 0, size); 2325 } 2326 2327 if (start != NULL) 2328 *start = virtual_avail; 2329 if (end != NULL) 2330 *end = VM_MAX_KERNEL_ADDRESS; 2331 2332 return (va); 2333 } 2334 2335 #ifdef DEBUG 2336 void pmap_dump(struct pmap *, vaddr_t, vaddr_t); 2337 2338 /* 2339 * pmap_dump: dump all the mappings from a pmap 2340 * 2341 * => caller should not be holding any pmap locks 2342 */ 2343 2344 void 2345 pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 2346 { 2347 pt_entry_t *ptes, *pte; 2348 pd_entry_t **pdes; 2349 vaddr_t blkendva; 2350 2351 /* 2352 * if end is out of range truncate. 2353 * if (end == start) update to max. 2354 */ 2355 2356 if (eva > VM_MAXUSER_ADDRESS || eva <= sva) 2357 eva = VM_MAXUSER_ADDRESS; 2358 2359 2360 PMAP_MAP_TO_HEAD_LOCK(); 2361 pmap_map_ptes(pmap, &ptes, &pdes); 2362 2363 /* 2364 * dumping a range of pages: we dump in PTP sized blocks (4MB) 2365 */ 2366 2367 for (/* null */ ; sva < eva ; sva = blkendva) { 2368 2369 /* determine range of block */ 2370 blkendva = x86_round_pdr(sva+1); 2371 if (blkendva > eva) 2372 blkendva = eva; 2373 2374 /* valid block? */ 2375 if (!pmap_pdes_valid(sva, pdes, NULL)) 2376 continue; 2377 2378 pte = &ptes[pl1_i(sva)]; 2379 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { 2380 if (!pmap_valid_entry(*pte)) 2381 continue; 2382 printf("va %#lx -> pa %#lx (pte=%#lx)\n", 2383 sva, *pte, *pte & PG_FRAME); 2384 } 2385 } 2386 pmap_unmap_ptes(pmap); 2387 PMAP_MAP_TO_HEAD_UNLOCK(); 2388 } 2389 #endif 2390 2391 void 2392 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 2393 { 2394 *vstartp = virtual_avail; 2395 *vendp = VM_MAX_KERNEL_ADDRESS; 2396 } 2397 2398 #ifdef MULTIPROCESSOR 2399 /* 2400 * Locking for tlb shootdown. 2401 * 2402 * We lock by setting tlb_shoot_wait to the number of cpus that will 2403 * receive our tlb shootdown. After sending the IPIs, we don't need to 2404 * worry about locking order or interrupts spinning for the lock because 2405 * the call that grabs the "lock" isn't the one that releases it. And 2406 * there is nothing that can block the IPI that releases the lock. 2407 * 2408 * The functions are organized so that we first count the number of 2409 * cpus we need to send the IPI to, then we grab the counter, then 2410 * we send the IPIs, then we finally do our own shootdown. 2411 * 2412 * Our shootdown is last to make it parallel with the other cpus 2413 * to shorten the spin time. 2414 * 2415 * Notice that we depend on failures to send IPIs only being able to 2416 * happen during boot. If they happen later, the above assumption 2417 * doesn't hold since we can end up in situations where noone will 2418 * release the lock if we get an interrupt in a bad moment. 2419 */ 2420 2421 volatile long tlb_shoot_wait; 2422 2423 volatile vaddr_t tlb_shoot_addr1; 2424 volatile vaddr_t tlb_shoot_addr2; 2425 2426 void 2427 pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) 2428 { 2429 struct cpu_info *ci, *self = curcpu(); 2430 CPU_INFO_ITERATOR cii; 2431 long wait = 0; 2432 u_int64_t mask = 0; 2433 2434 CPU_INFO_FOREACH(cii, ci) { 2435 if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || 2436 !(ci->ci_flags & CPUF_RUNNING)) 2437 continue; 2438 mask |= (1ULL << ci->ci_cpuid); 2439 wait++; 2440 } 2441 2442 if (wait > 0) { 2443 int s = splvm(); 2444 2445 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2446 while (tlb_shoot_wait != 0) 2447 SPINLOCK_SPIN_HOOK; 2448 } 2449 tlb_shoot_addr1 = va; 2450 CPU_INFO_FOREACH(cii, ci) { 2451 if ((mask & (1ULL << ci->ci_cpuid)) == 0) 2452 continue; 2453 if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0) 2454 panic("pmap_tlb_shootpage: ipi failed"); 2455 } 2456 splx(s); 2457 } 2458 2459 if (pmap_is_curpmap(pm)) 2460 pmap_update_pg(va); 2461 } 2462 2463 void 2464 pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) 2465 { 2466 struct cpu_info *ci, *self = curcpu(); 2467 CPU_INFO_ITERATOR cii; 2468 long wait = 0; 2469 u_int64_t mask = 0; 2470 vaddr_t va; 2471 2472 CPU_INFO_FOREACH(cii, ci) { 2473 if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || 2474 !(ci->ci_flags & CPUF_RUNNING)) 2475 continue; 2476 mask |= (1ULL << ci->ci_cpuid); 2477 wait++; 2478 } 2479 2480 if (wait > 0) { 2481 int s = splvm(); 2482 2483 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2484 while (tlb_shoot_wait != 0) 2485 SPINLOCK_SPIN_HOOK; 2486 } 2487 tlb_shoot_addr1 = sva; 2488 tlb_shoot_addr2 = eva; 2489 CPU_INFO_FOREACH(cii, ci) { 2490 if ((mask & (1ULL << ci->ci_cpuid)) == 0) 2491 continue; 2492 if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0) 2493 panic("pmap_tlb_shootrange: ipi failed"); 2494 } 2495 splx(s); 2496 } 2497 2498 if (pmap_is_curpmap(pm)) 2499 for (va = sva; va < eva; va += PAGE_SIZE) 2500 pmap_update_pg(va); 2501 } 2502 2503 void 2504 pmap_tlb_shoottlb(void) 2505 { 2506 struct cpu_info *ci, *self = curcpu(); 2507 CPU_INFO_ITERATOR cii; 2508 long wait = 0; 2509 u_int64_t mask = 0; 2510 2511 CPU_INFO_FOREACH(cii, ci) { 2512 if (ci == self || !(ci->ci_flags & CPUF_RUNNING)) 2513 continue; 2514 mask |= (1ULL << ci->ci_cpuid); 2515 wait++; 2516 } 2517 2518 if (wait) { 2519 int s = splvm(); 2520 2521 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2522 while (tlb_shoot_wait != 0) 2523 SPINLOCK_SPIN_HOOK; 2524 } 2525 2526 CPU_INFO_FOREACH(cii, ci) { 2527 if ((mask & (1ULL << ci->ci_cpuid)) == 0) 2528 continue; 2529 if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0) 2530 panic("pmap_tlb_shoottlb: ipi failed"); 2531 } 2532 splx(s); 2533 } 2534 2535 tlbflush(); 2536 } 2537 2538 void 2539 pmap_tlb_shootwait(void) 2540 { 2541 while (tlb_shoot_wait != 0) 2542 SPINLOCK_SPIN_HOOK; 2543 } 2544 2545 #else 2546 2547 void 2548 pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) 2549 { 2550 if (pmap_is_curpmap(pm)) 2551 pmap_update_pg(va); 2552 2553 } 2554 2555 void 2556 pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) 2557 { 2558 vaddr_t va; 2559 2560 for (va = sva; va < eva; va += PAGE_SIZE) 2561 pmap_update_pg(va); 2562 2563 } 2564 2565 void 2566 pmap_tlb_shoottlb(void) 2567 { 2568 tlbflush(); 2569 } 2570 #endif /* MULTIPROCESSOR */ 2571