1 /* $OpenBSD: pmap.c,v 1.51 2009/08/11 17:15:54 oga Exp $ */ 2 /* $NetBSD: pmap.c,v 1.3 2003/05/08 18:13:13 thorpej Exp $ */ 3 4 /* 5 * 6 * Copyright (c) 1997 Charles D. Cranor and Washington University. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by Charles D. Cranor and 20 * Washington University. 21 * 4. The name of the author may not be used to endorse or promote products 22 * derived from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright 2001 (c) Wasabi Systems, Inc. 38 * All rights reserved. 39 * 40 * Written by Frank van der Linden for Wasabi Systems, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed for the NetBSD Project by 53 * Wasabi Systems, Inc. 54 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 55 * or promote products derived from this software without specific prior 56 * written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 60 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 61 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 62 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 63 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 64 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 65 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 66 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 67 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 68 * POSSIBILITY OF SUCH DAMAGE. 69 */ 70 71 /* 72 * This is the i386 pmap modified and generalized to support x86-64 73 * as well. The idea is to hide the upper N levels of the page tables 74 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest 75 * is mostly untouched, except that it uses some more generalized 76 * macros and interfaces. 77 * 78 * This pmap has been tested on the i386 as well, and it can be easily 79 * adapted to PAE. 80 * 81 * fvdl@wasabisystems.com 18-Jun-2001 82 */ 83 84 /* 85 * pmap.c: i386 pmap module rewrite 86 * Chuck Cranor <chuck@ccrc.wustl.edu> 87 * 11-Aug-97 88 * 89 * history of this pmap module: in addition to my own input, i used 90 * the following references for this rewrite of the i386 pmap: 91 * 92 * [1] the NetBSD i386 pmap. this pmap appears to be based on the 93 * BSD hp300 pmap done by Mike Hibler at University of Utah. 94 * it was then ported to the i386 by William Jolitz of UUNET 95 * Technologies, Inc. Then Charles M. Hannum of the NetBSD 96 * project fixed some bugs and provided some speed ups. 97 * 98 * [2] the FreeBSD i386 pmap. this pmap seems to be the 99 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson 100 * and David Greenman. 101 * 102 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated 103 * between several processors. the VAX version was done by 104 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 105 * version was done by Lance Berc, Mike Kupfer, Bob Baron, 106 * David Golub, and Richard Draves. the alpha version was 107 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou 108 * (NetBSD/alpha). 109 */ 110 111 #include <sys/param.h> 112 #include <sys/systm.h> 113 #include <sys/proc.h> 114 #include <sys/malloc.h> 115 #include <sys/pool.h> 116 #include <sys/user.h> 117 #include <sys/kernel.h> 118 #include <sys/mutex.h> 119 #include <sys/sched.h> 120 121 #include <uvm/uvm.h> 122 123 #include <machine/atomic.h> 124 #include <machine/cpu.h> 125 #include <machine/specialreg.h> 126 #include <machine/gdt.h> 127 128 #include <dev/isa/isareg.h> 129 #include <machine/isa_machdep.h> 130 131 /* 132 * general info: 133 * 134 * - for an explanation of how the i386 MMU hardware works see 135 * the comments in <machine/pte.h>. 136 * 137 * - for an explanation of the general memory structure used by 138 * this pmap (including the recursive mapping), see the comments 139 * in <machine/pmap.h>. 140 * 141 * this file contains the code for the "pmap module." the module's 142 * job is to manage the hardware's virtual to physical address mappings. 143 * note that there are two levels of mapping in the VM system: 144 * 145 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's 146 * to map ranges of virtual address space to objects/files. for 147 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only 148 * to the file /bin/ls starting at offset zero." note that 149 * the upper layer mapping is not concerned with how individual 150 * vm_pages are mapped. 151 * 152 * [2] the lower layer of the VM system (the pmap) maintains the mappings 153 * from virtual addresses. it is concerned with which vm_page is 154 * mapped where. for example, when you run /bin/ls and start 155 * at page 0x1000 the fault routine may lookup the correct page 156 * of the /bin/ls file and then ask the pmap layer to establish 157 * a mapping for it. 158 * 159 * note that information in the lower layer of the VM system can be 160 * thrown away since it can easily be reconstructed from the info 161 * in the upper layer. 162 * 163 * data structures we use include: 164 * 165 * - struct pmap: describes the address space of one thread 166 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA 167 * - pmap_remove_record: a list of virtual addresses whose mappings 168 * have been changed. used for TLB flushing. 169 */ 170 171 /* 172 * memory allocation 173 * 174 * - there are three data structures that we must dynamically allocate: 175 * 176 * [A] new process' page directory page (PDP) 177 * - plan 1: done at pmap_create() we use 178 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this 179 * allocation. 180 * 181 * if we are low in free physical memory then we sleep in 182 * uvm_km_alloc -- in this case this is ok since we are creating 183 * a new pmap and should not be holding any locks. 184 * 185 * if the kernel is totally out of virtual space 186 * (i.e. uvm_km_alloc returns NULL), then we panic. 187 * 188 * XXX: the fork code currently has no way to return an "out of 189 * memory, try again" error code since uvm_fork [fka vm_fork] 190 * is a void function. 191 * 192 * [B] new page tables pages (PTP) 193 * call uvm_pagealloc() 194 * => success: zero page, add to pm_pdir 195 * => failure: we are out of free vm_pages, let pmap_enter() 196 * tell UVM about it. 197 * 198 * note: for kernel PTPs, we start with NKPTP of them. as we map 199 * kernel memory (at uvm_map time) we check to see if we've grown 200 * the kernel pmap. if so, we call the optional function 201 * pmap_growkernel() to grow the kernel PTPs in advance. 202 * 203 * [C] pv_entry structures 204 * - try to allocate one from the pool. 205 * If we fail, we simply let pmap_enter() tell UVM about it. 206 */ 207 208 /* 209 * XXX: would be nice to have per-CPU VAs for the above 4 210 */ 211 212 vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; 213 int ptp_shifts[] = PTP_SHIFT_INITIALIZER; 214 long nkptp[] = NKPTP_INITIALIZER; 215 long nkptpmax[] = NKPTPMAX_INITIALIZER; 216 long nbpd[] = NBPD_INITIALIZER; 217 pd_entry_t *normal_pdes[] = PDES_INITIALIZER; 218 pd_entry_t *alternate_pdes[] = APDES_INITIALIZER; 219 220 /* int nkpde = NKPTP; */ 221 222 #define PMAP_MAP_TO_HEAD_LOCK() /* null */ 223 #define PMAP_MAP_TO_HEAD_UNLOCK() /* null */ 224 225 #define PMAP_HEAD_TO_MAP_LOCK() /* null */ 226 #define PMAP_HEAD_TO_MAP_UNLOCK() /* null */ 227 228 #define COUNT(x) /* nothing */ 229 230 /* 231 * global data structures 232 */ 233 234 struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ 235 236 /* 237 * pmap_pg_g: if our processor supports PG_G in the PTE then we 238 * set pmap_pg_g to PG_G (otherwise it is zero). 239 */ 240 241 int pmap_pg_g = 0; 242 243 /* 244 * i386 physical memory comes in a big contig chunk with a small 245 * hole toward the front of it... the following 4 paddr_t's 246 * (shared with machdep.c) describe the physical address space 247 * of this machine. 248 */ 249 paddr_t avail_start; /* PA of first available physical page */ 250 paddr_t avail_end; /* PA of last available physical page */ 251 252 /* 253 * other data structures 254 */ 255 256 pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ 257 boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ 258 259 /* 260 * pv management structures. 261 */ 262 struct pool pmap_pv_pool; 263 264 /* 265 * linked list of all non-kernel pmaps 266 */ 267 268 struct pmap_head pmaps; 269 270 /* 271 * pool that pmap structures are allocated from 272 */ 273 274 struct pool pmap_pmap_pool; 275 276 /* 277 * When we're freeing a ptp, we need to delay the freeing until all 278 * tlb shootdown has been done. This is the list of the to-be-freed pages. 279 */ 280 TAILQ_HEAD(pg_to_free, vm_page); 281 282 /* 283 * pool that PDPs are allocated from 284 */ 285 286 struct pool pmap_pdp_pool; 287 u_int pmap_pdp_cache_generation; 288 289 int pmap_pdp_ctor(void *, void *, int); 290 291 extern vaddr_t msgbuf_vaddr; 292 extern paddr_t msgbuf_paddr; 293 294 extern vaddr_t idt_vaddr; /* we allocate IDT early */ 295 extern paddr_t idt_paddr; 296 297 #ifdef _LP64 298 extern vaddr_t lo32_vaddr; 299 extern vaddr_t lo32_paddr; 300 #endif 301 302 vaddr_t virtual_avail; 303 extern int end; 304 305 /* 306 * local prototypes 307 */ 308 309 void pmap_enter_pv(struct vm_page *, struct pv_entry *, struct pmap *, 310 vaddr_t, struct vm_page *); 311 struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, pd_entry_t **); 312 struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); 313 void pmap_free_ptp(struct pmap *, struct vm_page *, 314 vaddr_t, pt_entry_t *, pd_entry_t **, struct pg_to_free *); 315 void pmap_freepage(struct pmap *, struct vm_page *, int, struct pg_to_free *); 316 static boolean_t pmap_is_active(struct pmap *, int); 317 void pmap_map_ptes(struct pmap *, pt_entry_t **, pd_entry_t ***); 318 struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t); 319 void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int); 320 boolean_t pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *, 321 vaddr_t, int); 322 void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, 323 vaddr_t, vaddr_t, int); 324 #define PMAP_REMOVE_ALL 0 /* remove all mappings */ 325 #define PMAP_REMOVE_SKIPWIRED 1 /* skip wired mappings */ 326 327 void pmap_unmap_ptes(struct pmap *); 328 boolean_t pmap_get_physpage(vaddr_t, int, paddr_t *); 329 boolean_t pmap_pdes_valid(vaddr_t, pd_entry_t **, pd_entry_t *); 330 void pmap_alloc_level(pd_entry_t **, vaddr_t, int, long *); 331 void pmap_apte_flush(struct pmap *pmap); 332 333 void pmap_sync_flags_pte(struct vm_page *, u_long); 334 335 /* 336 * p m a p i n l i n e h e l p e r f u n c t i o n s 337 */ 338 339 /* 340 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? 341 * of course the kernel is always loaded 342 */ 343 344 static __inline boolean_t 345 pmap_is_curpmap(struct pmap *pmap) 346 { 347 return((pmap == pmap_kernel()) || 348 (pmap->pm_pdirpa == (paddr_t) rcr3())); 349 } 350 351 /* 352 * pmap_is_active: is this pmap loaded into the specified processor's %cr3? 353 */ 354 355 static __inline boolean_t 356 pmap_is_active(struct pmap *pmap, int cpu_id) 357 { 358 return (pmap == pmap_kernel() || 359 (pmap->pm_cpus & (1U << cpu_id)) != 0); 360 } 361 362 static __inline u_int 363 pmap_pte2flags(u_long pte) 364 { 365 return (((pte & PG_U) ? PG_PMAP_REF : 0) | 366 ((pte & PG_M) ? PG_PMAP_MOD : 0)); 367 } 368 369 void 370 pmap_sync_flags_pte(struct vm_page *pg, u_long pte) 371 { 372 if (pte & (PG_U|PG_M)) { 373 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(pte)); 374 } 375 } 376 377 void 378 pmap_apte_flush(struct pmap *pmap) 379 { 380 pmap_tlb_shoottlb(); 381 pmap_tlb_shootwait(); 382 } 383 384 /* 385 * pmap_map_ptes: map a pmap's PTEs into KVM 386 * 387 * => we lock enough pmaps to keep things locked in 388 * => must be undone with pmap_unmap_ptes before returning 389 */ 390 391 void 392 pmap_map_ptes(struct pmap *pmap, pt_entry_t **ptepp, pd_entry_t ***pdeppp) 393 { 394 pd_entry_t opde, npde; 395 396 /* if curpmap then we are always mapped */ 397 if (pmap_is_curpmap(pmap)) { 398 *ptepp = PTE_BASE; 399 *pdeppp = normal_pdes; 400 return; 401 } 402 403 /* need to load a new alternate pt space into curpmap? */ 404 opde = *APDP_PDE; 405 if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) { 406 npde = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V); 407 *APDP_PDE = npde; 408 if (pmap_valid_entry(opde)) 409 pmap_apte_flush(curpcb->pcb_pmap); 410 } 411 *ptepp = APTE_BASE; 412 *pdeppp = alternate_pdes; 413 } 414 415 void 416 pmap_unmap_ptes(struct pmap *pmap) 417 { 418 if (pmap_is_curpmap(pmap)) 419 return; 420 421 #if defined(MULTIPROCESSOR) 422 *APDP_PDE = 0; 423 pmap_apte_flush(curpcb->pcb_pmap); 424 #endif 425 COUNT(apdp_pde_unmap); 426 } 427 428 /* 429 * p m a p k e n t e r f u n c t i o n s 430 * 431 * functions to quickly enter/remove pages from the kernel address 432 * space. pmap_kremove is exported to MI kernel. we make use of 433 * the recursive PTE mappings. 434 */ 435 436 /* 437 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking 438 * 439 * => no need to lock anything, assume va is already allocated 440 * => should be faster than normal pmap enter function 441 */ 442 443 void 444 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) 445 { 446 pt_entry_t *pte, opte, npte; 447 448 pte = kvtopte(va); 449 450 npte = (pa & PMAP_PA_MASK) | ((prot & VM_PROT_WRITE) ? PG_RW : PG_RO) | 451 ((pa & PMAP_NOCACHE) ? PG_N : 0) | PG_V; 452 453 /* special 1:1 mappings in the first 2MB must not be global */ 454 if (va >= (vaddr_t)NBPD_L2) 455 npte |= pmap_pg_g; 456 457 if ((cpu_feature & CPUID_NXE) && !(prot & VM_PROT_EXECUTE)) 458 npte |= PG_NX; 459 opte = pmap_pte_set(pte, npte); 460 #ifdef LARGEPAGES 461 /* XXX For now... */ 462 if (opte & PG_PS) 463 panic("pmap_kenter_pa: PG_PS"); 464 #endif 465 if (pmap_valid_entry(opte)) { 466 if (pa & PMAP_NOCACHE && (opte & PG_N) == 0) 467 wbinvd(); 468 /* This shouldn't happen */ 469 pmap_tlb_shootpage(pmap_kernel(), va); 470 pmap_tlb_shootwait(); 471 } 472 } 473 474 /* 475 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking 476 * 477 * => no need to lock anything 478 * => caller must dispose of any vm_page mapped in the va range 479 * => note: not an inline function 480 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE 481 * => we assume kernel only unmaps valid addresses and thus don't bother 482 * checking the valid bit before doing TLB flushing 483 */ 484 485 void 486 pmap_kremove(vaddr_t sva, vsize_t len) 487 { 488 pt_entry_t *pte, opte; 489 vaddr_t va, eva; 490 491 eva = sva + len; 492 493 for (va = sva; va != eva; va += PAGE_SIZE) { 494 pte = kvtopte(va); 495 496 opte = pmap_pte_set(pte, 0); 497 #ifdef LARGEPAGES 498 KASSERT((opte & PG_PS) == 0); 499 #endif 500 KASSERT((opte & PG_PVLIST) == 0); 501 } 502 503 pmap_tlb_shootrange(pmap_kernel(), sva, eva); 504 pmap_tlb_shootwait(); 505 } 506 507 /* 508 * p m a p i n i t f u n c t i o n s 509 * 510 * pmap_bootstrap and pmap_init are called during system startup 511 * to init the pmap module. pmap_bootstrap() does a low level 512 * init just to get things rolling. pmap_init() finishes the job. 513 */ 514 515 /* 516 * pmap_bootstrap: get the system in a state where it can run with VM 517 * properly enabled (called before main()). the VM system is 518 * fully init'd later... 519 * 520 * => on i386, locore.s has already enabled the MMU by allocating 521 * a PDP for the kernel, and nkpde PTP's for the kernel. 522 * => kva_start is the first free virtual address in kernel space 523 */ 524 525 paddr_t 526 pmap_bootstrap(paddr_t first_avail, paddr_t max_pa) 527 { 528 vaddr_t kva, kva_end, kva_start = VM_MIN_KERNEL_ADDRESS; 529 struct pmap *kpm; 530 int i; 531 unsigned long p1i; 532 pt_entry_t pg_nx = (cpu_feature & CPUID_NXE? PG_NX : 0); 533 long ndmpdp; 534 paddr_t dmpd, dmpdp; 535 536 /* 537 * define the voundaries of the managed kernel virtual address 538 * space. 539 */ 540 541 virtual_avail = kva_start; /* first free KVA */ 542 543 /* 544 * set up protection_codes: we need to be able to convert from 545 * a MI protection code (some combo of VM_PROT...) to something 546 * we can jam into a i386 PTE. 547 */ 548 549 protection_codes[VM_PROT_NONE] = pg_nx; /* --- */ 550 protection_codes[VM_PROT_EXECUTE] = PG_RO; /* --x */ 551 protection_codes[VM_PROT_READ] = PG_RO | pg_nx; /* -r- */ 552 protection_codes[VM_PROT_READ|VM_PROT_EXECUTE] = PG_RO; /* -rx */ 553 protection_codes[VM_PROT_WRITE] = PG_RW | pg_nx; /* w-- */ 554 protection_codes[VM_PROT_WRITE|VM_PROT_EXECUTE] = PG_RW;/* w-x */ 555 protection_codes[VM_PROT_WRITE|VM_PROT_READ] = PG_RW | pg_nx; 556 /* wr- */ 557 protection_codes[VM_PROT_ALL] = PG_RW; /* wrx */ 558 559 /* 560 * now we init the kernel's pmap 561 * 562 * the kernel pmap's pm_obj is not used for much. however, in 563 * user pmaps the pm_obj contains the list of active PTPs. 564 * the pm_obj currently does not have a pager. it might be possible 565 * to add a pager that would allow a process to read-only mmap its 566 * own page tables (fast user level vtophys?). this may or may not 567 * be useful. 568 */ 569 570 kpm = pmap_kernel(); 571 for (i = 0; i < PTP_LEVELS - 1; i++) { 572 kpm->pm_obj[i].pgops = NULL; 573 RB_INIT(&kpm->pm_obj[i].memt); 574 kpm->pm_obj[i].uo_npages = 0; 575 kpm->pm_obj[i].uo_refs = 1; 576 kpm->pm_ptphint[i] = NULL; 577 } 578 memset(&kpm->pm_list, 0, sizeof(kpm->pm_list)); /* pm_list not used */ 579 kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); 580 kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3; 581 kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = 582 atop(kva_start - VM_MIN_KERNEL_ADDRESS); 583 584 /* 585 * the above is just a rough estimate and not critical to the proper 586 * operation of the system. 587 */ 588 589 curpcb->pcb_pmap = kpm; /* proc0's pcb */ 590 591 /* 592 * enable global TLB entries. 593 */ 594 pmap_pg_g = PG_G; /* enable software */ 595 596 /* add PG_G attribute to already mapped kernel pages */ 597 #if KERNBASE == VM_MIN_KERNEL_ADDRESS 598 for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ; 599 #else 600 kva_end = roundup((vaddr_t)&end, PAGE_SIZE); 601 for (kva = KERNBASE; kva < kva_end ; 602 #endif 603 kva += PAGE_SIZE) { 604 p1i = pl1_i(kva); 605 if (pmap_valid_entry(PTE_BASE[p1i])) 606 PTE_BASE[p1i] |= PG_G; 607 } 608 609 /* 610 * Map the direct map. The first 4GB were mapped in locore, here 611 * we map the rest if it exists. We actually use the direct map 612 * here to set up the page tables, we're assuming that we're still 613 * operating in the lower 4GB of memory. 614 */ 615 ndmpdp = (max_pa + NBPD_L3 - 1) >> L3_SHIFT; 616 if (ndmpdp < NDML2_ENTRIES) 617 ndmpdp = NDML2_ENTRIES; /* At least 4GB */ 618 619 dmpdp = kpm->pm_pdir[PDIR_SLOT_DIRECT] & PG_FRAME; 620 621 dmpd = first_avail; first_avail += ndmpdp * PAGE_SIZE; 622 623 for (i = NDML2_ENTRIES; i < NPDPG * ndmpdp; i++) { 624 paddr_t pdp; 625 vaddr_t va; 626 627 pdp = (paddr_t)&(((pd_entry_t *)dmpd)[i]); 628 va = PMAP_DIRECT_MAP(pdp); 629 630 *((pd_entry_t *)va) = ((paddr_t)i << L2_SHIFT); 631 *((pd_entry_t *)va) |= PG_RW | PG_V | PG_PS | PG_G | PG_U | 632 PG_M; 633 } 634 635 for (i = NDML2_ENTRIES; i < ndmpdp; i++) { 636 paddr_t pdp; 637 vaddr_t va; 638 639 pdp = (paddr_t)&(((pd_entry_t *)dmpdp)[i]); 640 va = PMAP_DIRECT_MAP(pdp); 641 642 *((pd_entry_t *)va) = dmpd + (i << PAGE_SHIFT); 643 *((pd_entry_t *)va) |= PG_RW | PG_V | PG_U | PG_M; 644 } 645 646 kpm->pm_pdir[PDIR_SLOT_DIRECT] = dmpdp | PG_V | PG_KW | PG_U | 647 PG_M; 648 649 tlbflush(); 650 651 msgbuf_vaddr = virtual_avail; 652 virtual_avail += round_page(MSGBUFSIZE); 653 654 idt_vaddr = virtual_avail; 655 virtual_avail += 2 * PAGE_SIZE; 656 idt_paddr = first_avail; /* steal a page */ 657 first_avail += 2 * PAGE_SIZE; 658 659 #ifdef _LP64 660 /* 661 * Grab a page below 4G for things that need it (i.e. 662 * having an initial %cr3 for the MP trampoline). 663 */ 664 lo32_vaddr = virtual_avail; 665 virtual_avail += PAGE_SIZE; 666 lo32_paddr = first_avail; 667 first_avail += PAGE_SIZE; 668 #endif 669 670 /* 671 * now we reserve some VM for mapping pages when doing a crash dump 672 */ 673 674 virtual_avail = reserve_dumppages(virtual_avail); 675 676 /* 677 * init the global lists. 678 */ 679 LIST_INIT(&pmaps); 680 681 /* 682 * initialize the pmap pool. 683 */ 684 685 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", 686 &pool_allocator_nointr); 687 pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl", 688 &pool_allocator_nointr); 689 pool_sethiwat(&pmap_pv_pool, 32 * 1024); 690 691 /* 692 * initialize the PDE pool. 693 */ 694 695 pool_init(&pmap_pdp_pool, PAGE_SIZE, 0, 0, 0, "pdppl", 696 &pool_allocator_nointr); 697 pool_set_ctordtor(&pmap_pdp_pool, pmap_pdp_ctor, NULL, NULL); 698 699 700 /* 701 * ensure the TLB is sync'd with reality by flushing it... 702 */ 703 704 tlbflush(); 705 706 return first_avail; 707 } 708 709 /* 710 * Pre-allocate PTPs for low memory, so that 1:1 mappings for various 711 * trampoline code can be entered. 712 */ 713 paddr_t 714 pmap_prealloc_lowmem_ptps(paddr_t first_avail) 715 { 716 pd_entry_t *pdes; 717 int level; 718 paddr_t newp; 719 720 pdes = pmap_kernel()->pm_pdir; 721 level = PTP_LEVELS; 722 for (;;) { 723 newp = first_avail; first_avail += PAGE_SIZE; 724 memset((void *)PMAP_DIRECT_MAP(newp), 0, PAGE_SIZE); 725 pdes[pl_i(0, level)] = (newp & PG_FRAME) | PG_V | PG_RW; 726 level--; 727 if (level <= 1) 728 break; 729 pdes = normal_pdes[level - 2]; 730 } 731 732 return first_avail; 733 } 734 735 /* 736 * pmap_init: called from uvm_init, our job is to get the pmap 737 * system ready to manage mappings... this mainly means initing 738 * the pv_entry stuff. 739 */ 740 741 void 742 pmap_init(void) 743 { 744 /* 745 * done: pmap module is up (and ready for business) 746 */ 747 748 pmap_initialized = TRUE; 749 } 750 751 /* 752 * p v _ e n t r y f u n c t i o n s 753 */ 754 755 /* 756 * main pv_entry manipulation functions: 757 * pmap_enter_pv: enter a mapping onto a pv list 758 * pmap_remove_pv: remove a mapping from a pv list 759 */ 760 761 /* 762 * pmap_enter_pv: enter a mapping onto a pv list 763 * 764 * => caller should adjust ptp's wire_count before calling 765 * 766 * pve: preallocated pve for us to use 767 * ptp: PTP in pmap that maps this VA 768 */ 769 770 void 771 pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, struct pmap *pmap, 772 vaddr_t va, struct vm_page *ptp) 773 { 774 pve->pv_pmap = pmap; 775 pve->pv_va = va; 776 pve->pv_ptp = ptp; /* NULL for kernel pmap */ 777 pve->pv_next = pg->mdpage.pv_list; /* add to ... */ 778 pg->mdpage.pv_list = pve; /* ... list */ 779 } 780 781 /* 782 * pmap_remove_pv: try to remove a mapping from a pv_list 783 * 784 * => caller should adjust ptp's wire_count and free PTP if needed 785 * => we return the removed pve 786 */ 787 788 struct pv_entry * 789 pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va) 790 { 791 struct pv_entry *pve, **prevptr; 792 793 prevptr = &pg->mdpage.pv_list; 794 while ((pve = *prevptr) != NULL) { 795 if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */ 796 *prevptr = pve->pv_next; /* remove it! */ 797 break; 798 } 799 prevptr = &pve->pv_next; /* previous pointer */ 800 } 801 return(pve); /* return removed pve */ 802 } 803 804 /* 805 * p t p f u n c t i o n s 806 */ 807 808 struct vm_page * 809 pmap_find_ptp(struct pmap *pmap, vaddr_t va, paddr_t pa, int level) 810 { 811 int lidx = level - 1; 812 struct vm_page *pg; 813 814 if (pa != (paddr_t)-1 && pmap->pm_ptphint[lidx] && 815 pa == VM_PAGE_TO_PHYS(pmap->pm_ptphint[lidx])) { 816 return (pmap->pm_ptphint[lidx]); 817 } 818 if (lidx == 0) 819 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 820 else { 821 pg = uvm_pagelookup(&pmap->pm_obj[lidx], ptp_va2o(va, level)); 822 } 823 return pg; 824 } 825 826 void 827 pmap_freepage(struct pmap *pmap, struct vm_page *ptp, int level, 828 struct pg_to_free *pagelist) 829 { 830 int lidx; 831 struct uvm_object *obj; 832 833 lidx = level - 1; 834 835 obj = &pmap->pm_obj[lidx]; 836 pmap->pm_stats.resident_count--; 837 if (pmap->pm_ptphint[lidx] == ptp) 838 pmap->pm_ptphint[lidx] = RB_ROOT(&obj->memt); 839 ptp->wire_count = 0; 840 uvm_pagerealloc(ptp, NULL, 0); 841 TAILQ_INSERT_TAIL(pagelist, ptp, pageq); 842 } 843 844 void 845 pmap_free_ptp(struct pmap *pmap, struct vm_page *ptp, vaddr_t va, 846 pt_entry_t *ptes, pd_entry_t **pdes, struct pg_to_free *pagelist) 847 { 848 unsigned long index; 849 int level; 850 vaddr_t invaladdr; 851 pd_entry_t opde; 852 853 level = 1; 854 do { 855 pmap_freepage(pmap, ptp, level, pagelist); 856 index = pl_i(va, level + 1); 857 opde = pmap_pte_set(&pdes[level - 1][index], 0); 858 invaladdr = level == 1 ? (vaddr_t)ptes : 859 (vaddr_t)pdes[level - 2]; 860 pmap_tlb_shootpage(curpcb->pcb_pmap, 861 invaladdr + index * PAGE_SIZE); 862 #if defined(MULTIPROCESSOR) 863 invaladdr = level == 1 ? (vaddr_t)PTE_BASE : 864 (vaddr_t)normal_pdes[level - 2]; 865 pmap_tlb_shootpage(pmap, invaladdr + index * PAGE_SIZE); 866 #endif 867 if (level < PTP_LEVELS - 1) { 868 ptp = pmap_find_ptp(pmap, va, (paddr_t)-1, level + 1); 869 ptp->wire_count--; 870 if (ptp->wire_count > 1) 871 break; 872 } 873 } while (++level < PTP_LEVELS); 874 } 875 876 /* 877 * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one) 878 * 879 * => pmap should NOT be pmap_kernel() 880 */ 881 882 883 struct vm_page * 884 pmap_get_ptp(struct pmap *pmap, vaddr_t va, pd_entry_t **pdes) 885 { 886 struct vm_page *ptp, *pptp; 887 int i; 888 unsigned long index; 889 pd_entry_t *pva; 890 paddr_t ppa, pa; 891 struct uvm_object *obj; 892 893 ptp = NULL; 894 pa = (paddr_t)-1; 895 896 /* 897 * Loop through all page table levels seeing if we need to 898 * add a new page to that level. 899 */ 900 for (i = PTP_LEVELS; i > 1; i--) { 901 /* 902 * Save values from previous round. 903 */ 904 pptp = ptp; 905 ppa = pa; 906 907 index = pl_i(va, i); 908 pva = pdes[i - 2]; 909 910 if (pmap_valid_entry(pva[index])) { 911 ppa = pva[index] & PG_FRAME; 912 ptp = NULL; 913 continue; 914 } 915 916 obj = &pmap->pm_obj[i-2]; 917 ptp = uvm_pagealloc(obj, ptp_va2o(va, i - 1), NULL, 918 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 919 920 if (ptp == NULL) 921 return NULL; 922 923 atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); 924 ptp->wire_count = 1; 925 pmap->pm_ptphint[i - 2] = ptp; 926 pa = VM_PAGE_TO_PHYS(ptp); 927 pva[index] = (pd_entry_t) (pa | PG_u | PG_RW | PG_V); 928 pmap->pm_stats.resident_count++; 929 /* 930 * If we're not in the top level, increase the 931 * wire count of the parent page. 932 */ 933 if (i < PTP_LEVELS) { 934 if (pptp == NULL) 935 pptp = pmap_find_ptp(pmap, va, ppa, i); 936 #ifdef DIAGNOSTIC 937 if (pptp == NULL) 938 panic("pde page disappeared"); 939 #endif 940 pptp->wire_count++; 941 } 942 } 943 944 /* 945 * ptp is not NULL if we just allocated a new ptp. If it's 946 * still NULL, we must look up the existing one. 947 */ 948 if (ptp == NULL) { 949 ptp = pmap_find_ptp(pmap, va, ppa, 1); 950 #ifdef DIAGNOSTIC 951 if (ptp == NULL) { 952 printf("va %lx ppa %lx\n", (unsigned long)va, 953 (unsigned long)ppa); 954 panic("pmap_get_ptp: unmanaged user PTP"); 955 } 956 #endif 957 } 958 959 pmap->pm_ptphint[0] = ptp; 960 return(ptp); 961 } 962 963 /* 964 * p m a p l i f e c y c l e f u n c t i o n s 965 */ 966 967 /* 968 * pmap_pdp_ctor: constructor for the PDP cache. 969 */ 970 971 int 972 pmap_pdp_ctor(void *arg, void *object, int flags) 973 { 974 pd_entry_t *pdir = object; 975 paddr_t pdirpa; 976 int npde; 977 978 /* fetch the physical address of the page directory. */ 979 (void) pmap_extract(pmap_kernel(), (vaddr_t) pdir, &pdirpa); 980 981 /* zero init area */ 982 memset(pdir, 0, PDIR_SLOT_PTE * sizeof(pd_entry_t)); 983 984 /* put in recursive PDE to map the PTEs */ 985 pdir[PDIR_SLOT_PTE] = pdirpa | PG_V | PG_KW; 986 987 npde = nkptp[PTP_LEVELS - 1]; 988 989 /* put in kernel VM PDEs */ 990 memcpy(&pdir[PDIR_SLOT_KERN], &PDP_BASE[PDIR_SLOT_KERN], 991 npde * sizeof(pd_entry_t)); 992 993 /* zero the rest */ 994 memset(&pdir[PDIR_SLOT_KERN + npde], 0, 995 (NTOPLEVEL_PDES - (PDIR_SLOT_KERN + npde)) * sizeof(pd_entry_t)); 996 997 pdir[PDIR_SLOT_DIRECT] = pmap_kernel()->pm_pdir[PDIR_SLOT_DIRECT]; 998 999 #if VM_MIN_KERNEL_ADDRESS != KERNBASE 1000 pdir[pl4_pi(KERNBASE)] = PDP_BASE[pl4_pi(KERNBASE)]; 1001 #endif 1002 1003 return (0); 1004 } 1005 1006 /* 1007 * pmap_create: create a pmap 1008 * 1009 * => note: old pmap interface took a "size" args which allowed for 1010 * the creation of "software only" pmaps (not in bsd). 1011 */ 1012 1013 struct pmap * 1014 pmap_create(void) 1015 { 1016 struct pmap *pmap; 1017 int i; 1018 u_int gen; 1019 1020 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); 1021 1022 /* init uvm_object */ 1023 for (i = 0; i < PTP_LEVELS - 1; i++) { 1024 pmap->pm_obj[i].pgops = NULL; /* not a mappable object */ 1025 RB_INIT(&pmap->pm_obj[i].memt); 1026 pmap->pm_obj[i].uo_npages = 0; 1027 pmap->pm_obj[i].uo_refs = 1; 1028 pmap->pm_ptphint[i] = NULL; 1029 } 1030 pmap->pm_stats.wired_count = 0; 1031 pmap->pm_stats.resident_count = 1; /* count the PDP allocd below */ 1032 pmap->pm_flags = 0; 1033 1034 /* init the LDT */ 1035 pmap->pm_ldt = NULL; 1036 pmap->pm_ldt_len = 0; 1037 pmap->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); 1038 1039 /* allocate PDP */ 1040 1041 /* 1042 * note that there is no need to splvm to protect us from 1043 * malloc since malloc allocates out of a submap and we should 1044 * have already allocated kernel PTPs to cover the range... 1045 */ 1046 1047 try_again: 1048 gen = pmap_pdp_cache_generation; 1049 pmap->pm_pdir = pool_get(&pmap_pdp_pool, PR_WAITOK); 1050 1051 if (gen != pmap_pdp_cache_generation) { 1052 pool_put(&pmap_pdp_pool, pmap->pm_pdir); 1053 goto try_again; 1054 } 1055 1056 pmap->pm_pdirpa = pmap->pm_pdir[PDIR_SLOT_PTE] & PG_FRAME; 1057 1058 LIST_INSERT_HEAD(&pmaps, pmap, pm_list); 1059 return (pmap); 1060 } 1061 1062 /* 1063 * pmap_destroy: drop reference count on pmap. free pmap if 1064 * reference count goes to zero. 1065 */ 1066 1067 void 1068 pmap_destroy(struct pmap *pmap) 1069 { 1070 struct vm_page *pg; 1071 int refs; 1072 int i; 1073 1074 /* 1075 * drop reference count 1076 */ 1077 1078 refs = --pmap->pm_obj[0].uo_refs; 1079 if (refs > 0) { 1080 return; 1081 } 1082 1083 /* 1084 * reference count is zero, free pmap resources and then free pmap. 1085 */ 1086 1087 /* 1088 * remove it from global list of pmaps 1089 */ 1090 LIST_REMOVE(pmap, pm_list); 1091 1092 /* 1093 * free any remaining PTPs 1094 */ 1095 1096 for (i = 0; i < PTP_LEVELS - 1; i++) { 1097 while ((pg = RB_ROOT(&pmap->pm_obj[i].memt)) != NULL) { 1098 KASSERT((pg->pg_flags & PG_BUSY) == 0); 1099 1100 pg->wire_count = 0; 1101 uvm_pagefree(pg); 1102 } 1103 } 1104 1105 /* 1106 * MULTIPROCESSOR -- no need to flush out of other processors' 1107 * APTE space because we do that in pmap_unmap_ptes(). 1108 */ 1109 /* XXX: need to flush it out of other processor's APTE space? */ 1110 pool_put(&pmap_pdp_pool, pmap->pm_pdir); 1111 1112 pool_put(&pmap_pmap_pool, pmap); 1113 } 1114 1115 /* 1116 * Add a reference to the specified pmap. 1117 */ 1118 1119 void 1120 pmap_reference(struct pmap *pmap) 1121 { 1122 pmap->pm_obj[0].uo_refs++; 1123 } 1124 1125 /* 1126 * pmap_activate: activate a process' pmap (fill in %cr3 and LDT info) 1127 * 1128 * => called from cpu_switch() 1129 * => if p is the curproc, then load it into the MMU 1130 */ 1131 1132 void 1133 pmap_activate(struct proc *p) 1134 { 1135 struct pcb *pcb = &p->p_addr->u_pcb; 1136 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 1137 1138 pcb->pcb_pmap = pmap; 1139 pcb->pcb_ldt_sel = pmap->pm_ldt_sel; 1140 pcb->pcb_cr3 = pmap->pm_pdirpa; 1141 if (p == curproc) 1142 lcr3(pcb->pcb_cr3); 1143 if (pcb == curpcb) 1144 lldt(pcb->pcb_ldt_sel); 1145 1146 /* 1147 * mark the pmap in use by this processor. 1148 */ 1149 x86_atomic_setbits_ul(&pmap->pm_cpus, (1U << cpu_number())); 1150 } 1151 1152 /* 1153 * pmap_deactivate: deactivate a process' pmap 1154 */ 1155 1156 void 1157 pmap_deactivate(struct proc *p) 1158 { 1159 struct pmap *pmap = p->p_vmspace->vm_map.pmap; 1160 1161 /* 1162 * mark the pmap no longer in use by this processor. 1163 */ 1164 x86_atomic_clearbits_ul(&pmap->pm_cpus, (1U << cpu_number())); 1165 1166 } 1167 1168 /* 1169 * end of lifecycle functions 1170 */ 1171 1172 /* 1173 * some misc. functions 1174 */ 1175 1176 boolean_t 1177 pmap_pdes_valid(vaddr_t va, pd_entry_t **pdes, pd_entry_t *lastpde) 1178 { 1179 int i; 1180 unsigned long index; 1181 pd_entry_t pde; 1182 1183 for (i = PTP_LEVELS; i > 1; i--) { 1184 index = pl_i(va, i); 1185 pde = pdes[i - 2][index]; 1186 if ((pde & PG_V) == 0) 1187 return FALSE; 1188 } 1189 if (lastpde != NULL) 1190 *lastpde = pde; 1191 return TRUE; 1192 } 1193 1194 /* 1195 * pmap_extract: extract a PA for the given VA 1196 */ 1197 1198 boolean_t 1199 pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap) 1200 { 1201 pt_entry_t *ptes, pte; 1202 pd_entry_t pde, **pdes; 1203 1204 if (pmap == pmap_kernel() && va >= PMAP_DIRECT_BASE && 1205 va < PMAP_DIRECT_END) { 1206 *pap = va - PMAP_DIRECT_BASE; 1207 return (TRUE); 1208 } 1209 1210 pmap_map_ptes(pmap, &ptes, &pdes); 1211 if (pmap_pdes_valid(va, pdes, &pde) == FALSE) { 1212 return FALSE; 1213 } 1214 1215 if (pde & PG_PS) { 1216 if (pap != NULL) 1217 *pap = (pde & PG_LGFRAME) | (va & 0x1fffff); 1218 pmap_unmap_ptes(pmap); 1219 return (TRUE); 1220 } 1221 1222 pte = ptes[pl1_i(va)]; 1223 pmap_unmap_ptes(pmap); 1224 1225 if (__predict_true((pte & PG_V) != 0)) { 1226 if (pap != NULL) 1227 *pap = (pte & PG_FRAME) | (va & 0xfff); 1228 return (TRUE); 1229 } 1230 1231 return FALSE; 1232 } 1233 1234 /* 1235 * pmap_map: map a range of PAs into kvm 1236 * 1237 * => used during crash dump 1238 * => XXX: pmap_map() should be phased out? 1239 */ 1240 1241 vaddr_t 1242 pmap_map(vaddr_t va, paddr_t spa, paddr_t epa, vm_prot_t prot) 1243 { 1244 while (spa < epa) { 1245 pmap_enter(pmap_kernel(), va, spa, prot, 0); 1246 va += PAGE_SIZE; 1247 spa += PAGE_SIZE; 1248 } 1249 pmap_update(pmap_kernel()); 1250 return va; 1251 } 1252 1253 1254 /* 1255 * pmap_zero_page: zero a page 1256 */ 1257 1258 void 1259 pmap_zero_page(struct vm_page *pg) 1260 { 1261 pagezero(pmap_map_direct(pg)); 1262 } 1263 1264 /* 1265 * pmap_pagezeroidle: the same, for the idle loop page zero'er. 1266 * Returns TRUE if the page was zero'd, FALSE if we aborted for 1267 * some reason. 1268 */ 1269 1270 boolean_t 1271 pmap_pageidlezero(struct vm_page *pg) 1272 { 1273 vaddr_t va = pmap_map_direct(pg); 1274 boolean_t rv = TRUE; 1275 long *ptr; 1276 int i; 1277 1278 /* 1279 * XXX - We'd really like to do this uncached. But at this moment 1280 * we're never called, so just pretend that this works. 1281 * It shouldn't be too hard to create a second direct map 1282 * with uncached mappings. 1283 */ 1284 for (i = 0, ptr = (long *) va; i < PAGE_SIZE / sizeof(long); i++) { 1285 if (!curcpu_is_idle()) { 1286 1287 /* 1288 * A process has become ready. Abort now, 1289 * so we don't keep it waiting while we 1290 * do slow memory access to finish this 1291 * page. 1292 */ 1293 1294 rv = FALSE; 1295 break; 1296 } 1297 *ptr++ = 0; 1298 } 1299 1300 return (rv); 1301 } 1302 1303 /* 1304 * pmap_copy_page: copy a page 1305 */ 1306 1307 void 1308 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) 1309 { 1310 vaddr_t srcva = pmap_map_direct(srcpg); 1311 vaddr_t dstva = pmap_map_direct(dstpg); 1312 1313 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE); 1314 } 1315 1316 /* 1317 * p m a p r e m o v e f u n c t i o n s 1318 * 1319 * functions that remove mappings 1320 */ 1321 1322 /* 1323 * pmap_remove_ptes: remove PTEs from a PTP 1324 * 1325 * => must have proper locking on pmap_master_lock 1326 * => PTP must be mapped into KVA 1327 * => PTP should be null if pmap == pmap_kernel() 1328 */ 1329 1330 void 1331 pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, 1332 vaddr_t startva, vaddr_t endva, int flags) 1333 { 1334 struct pv_entry *pve; 1335 pt_entry_t *pte = (pt_entry_t *) ptpva; 1336 struct vm_page *pg; 1337 pt_entry_t opte; 1338 1339 /* 1340 * note that ptpva points to the PTE that maps startva. this may 1341 * or may not be the first PTE in the PTP. 1342 * 1343 * we loop through the PTP while there are still PTEs to look at 1344 * and the wire_count is greater than 1 (because we use the wire_count 1345 * to keep track of the number of real PTEs in the PTP). 1346 */ 1347 1348 for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1) 1349 ; pte++, startva += PAGE_SIZE) { 1350 if (!pmap_valid_entry(*pte)) 1351 continue; /* VA not mapped */ 1352 if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { 1353 continue; 1354 } 1355 1356 /* atomically save the old PTE and zap! it */ 1357 opte = pmap_pte_set(pte, 0); 1358 1359 if (opte & PG_W) 1360 pmap->pm_stats.wired_count--; 1361 pmap->pm_stats.resident_count--; 1362 1363 if (ptp) 1364 ptp->wire_count--; /* dropping a PTE */ 1365 1366 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 1367 1368 /* 1369 * if we are not on a pv list we are done. 1370 */ 1371 1372 if ((opte & PG_PVLIST) == 0) { 1373 #ifdef DIAGNOSTIC 1374 if (pg != NULL) 1375 panic("pmap_remove_ptes: managed page without " 1376 "PG_PVLIST for 0x%lx", startva); 1377 #endif 1378 continue; 1379 } 1380 1381 #ifdef DIAGNOSTIC 1382 if (pg == NULL) 1383 panic("pmap_remove_ptes: unmanaged page marked " 1384 "PG_PVLIST, va = 0x%lx, pa = 0x%lx", 1385 startva, (u_long)(opte & PG_FRAME)); 1386 #endif 1387 1388 /* sync R/M bits */ 1389 pmap_sync_flags_pte(pg, opte); 1390 pve = pmap_remove_pv(pg, pmap, startva); 1391 1392 if (pve) { 1393 pool_put(&pmap_pv_pool, pve); 1394 } 1395 1396 /* end of "for" loop: time for next pte */ 1397 } 1398 } 1399 1400 1401 /* 1402 * pmap_remove_pte: remove a single PTE from a PTP 1403 * 1404 * => must have proper locking on pmap_master_lock 1405 * => PTP must be mapped into KVA 1406 * => PTP should be null if pmap == pmap_kernel() 1407 * => returns true if we removed a mapping 1408 */ 1409 1410 boolean_t 1411 pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, 1412 vaddr_t va, int flags) 1413 { 1414 struct pv_entry *pve; 1415 struct vm_page *pg; 1416 pt_entry_t opte; 1417 1418 if (!pmap_valid_entry(*pte)) 1419 return(FALSE); /* VA not mapped */ 1420 if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) { 1421 return(FALSE); 1422 } 1423 1424 /* atomically save the old PTE and zap! it */ 1425 opte = pmap_pte_set(pte, 0); 1426 1427 if (opte & PG_W) 1428 pmap->pm_stats.wired_count--; 1429 pmap->pm_stats.resident_count--; 1430 1431 if (ptp) 1432 ptp->wire_count--; /* dropping a PTE */ 1433 1434 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 1435 1436 /* 1437 * if we are not on a pv list we are done. 1438 */ 1439 if ((opte & PG_PVLIST) == 0) { 1440 #ifdef DIAGNOSTIC 1441 if (pg != NULL) 1442 panic("pmap_remove_pte: managed page without " 1443 "PG_PVLIST for 0x%lx", va); 1444 #endif 1445 return(TRUE); 1446 } 1447 1448 #ifdef DIAGNOSTIC 1449 if (pg == NULL) 1450 panic("pmap_remove_pte: unmanaged page marked " 1451 "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, 1452 (u_long)(opte & PG_FRAME)); 1453 #endif 1454 1455 /* sync R/M bits */ 1456 pmap_sync_flags_pte(pg, opte); 1457 pve = pmap_remove_pv(pg, pmap, va); 1458 if (pve) 1459 pool_put(&pmap_pv_pool, pve); 1460 return(TRUE); 1461 } 1462 1463 /* 1464 * pmap_remove: top level mapping removal function 1465 * 1466 * => caller should not be holding any pmap locks 1467 */ 1468 1469 void 1470 pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 1471 { 1472 pmap_do_remove(pmap, sva, eva, PMAP_REMOVE_ALL); 1473 } 1474 1475 /* 1476 * pmap_do_remove: mapping removal guts 1477 * 1478 * => caller should not be holding any pmap locks 1479 */ 1480 1481 void 1482 pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) 1483 { 1484 pt_entry_t *ptes; 1485 pd_entry_t **pdes, pde; 1486 boolean_t result; 1487 paddr_t ptppa; 1488 vaddr_t blkendva; 1489 struct vm_page *ptp; 1490 vaddr_t va; 1491 int shootall = 0; 1492 struct pg_to_free empty_ptps; 1493 1494 TAILQ_INIT(&empty_ptps); 1495 1496 PMAP_MAP_TO_HEAD_LOCK(); 1497 pmap_map_ptes(pmap, &ptes, &pdes); 1498 1499 /* 1500 * removing one page? take shortcut function. 1501 */ 1502 1503 if (sva + PAGE_SIZE == eva) { 1504 if (pmap_pdes_valid(sva, pdes, &pde)) { 1505 1506 /* PA of the PTP */ 1507 ptppa = pde & PG_FRAME; 1508 1509 /* get PTP if non-kernel mapping */ 1510 1511 if (pmap == pmap_kernel()) { 1512 /* we never free kernel PTPs */ 1513 ptp = NULL; 1514 } else { 1515 ptp = pmap_find_ptp(pmap, sva, ptppa, 1); 1516 #ifdef DIAGNOSTIC 1517 if (ptp == NULL) 1518 panic("pmap_remove: unmanaged " 1519 "PTP detected"); 1520 #endif 1521 } 1522 1523 /* do it! */ 1524 result = pmap_remove_pte(pmap, ptp, 1525 &ptes[pl1_i(sva)], sva, flags); 1526 1527 /* 1528 * if mapping removed and the PTP is no longer 1529 * being used, free it! 1530 */ 1531 1532 if (result && ptp && ptp->wire_count <= 1) 1533 pmap_free_ptp(pmap, ptp, sva, ptes, pdes, 1534 &empty_ptps); 1535 pmap_tlb_shootpage(pmap, sva); 1536 } 1537 1538 pmap_tlb_shootwait(); 1539 pmap_unmap_ptes(pmap); 1540 PMAP_MAP_TO_HEAD_UNLOCK(); 1541 1542 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1543 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1544 uvm_pagefree(ptp); 1545 } 1546 1547 return; 1548 } 1549 1550 if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) 1551 shootall = 1; 1552 1553 for (va = sva; va < eva; va = blkendva) { 1554 /* determine range of block */ 1555 blkendva = x86_round_pdr(va + 1); 1556 if (blkendva > eva) 1557 blkendva = eva; 1558 1559 /* 1560 * XXXCDC: our PTE mappings should never be removed 1561 * with pmap_remove! if we allow this (and why would 1562 * we?) then we end up freeing the pmap's page 1563 * directory page (PDP) before we are finished using 1564 * it when we hit in in the recursive mapping. this 1565 * is BAD. 1566 * 1567 * long term solution is to move the PTEs out of user 1568 * address space. and into kernel address space (up 1569 * with APTE). then we can set VM_MAXUSER_ADDRESS to 1570 * be VM_MAX_ADDRESS. 1571 */ 1572 1573 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE) 1574 /* XXXCDC: ugly hack to avoid freeing PDP here */ 1575 continue; 1576 1577 if (!pmap_pdes_valid(va, pdes, &pde)) 1578 continue; 1579 1580 /* PA of the PTP */ 1581 ptppa = pde & PG_FRAME; 1582 1583 /* get PTP if non-kernel mapping */ 1584 if (pmap == pmap_kernel()) { 1585 /* we never free kernel PTPs */ 1586 ptp = NULL; 1587 } else { 1588 ptp = pmap_find_ptp(pmap, va, ptppa, 1); 1589 #ifdef DIAGNOSTIC 1590 if (ptp == NULL) 1591 panic("pmap_remove: unmanaged PTP " 1592 "detected"); 1593 #endif 1594 } 1595 pmap_remove_ptes(pmap, ptp, 1596 (vaddr_t)&ptes[pl1_i(va)], va, blkendva, flags); 1597 1598 /* if PTP is no longer being used, free it! */ 1599 if (ptp && ptp->wire_count <= 1) { 1600 pmap_free_ptp(pmap, ptp, va, ptes, pdes, &empty_ptps); 1601 } 1602 } 1603 1604 if (shootall) 1605 pmap_tlb_shoottlb(); 1606 else 1607 pmap_tlb_shootrange(pmap, sva, eva); 1608 1609 pmap_tlb_shootwait(); 1610 1611 pmap_unmap_ptes(pmap); 1612 PMAP_MAP_TO_HEAD_UNLOCK(); 1613 1614 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1615 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1616 uvm_pagefree(ptp); 1617 } 1618 } 1619 1620 /* 1621 * pmap_page_remove: remove a managed vm_page from all pmaps that map it 1622 * 1623 * => R/M bits are sync'd back to attrs 1624 */ 1625 1626 void 1627 pmap_page_remove(struct vm_page *pg) 1628 { 1629 struct pv_entry *pve; 1630 pt_entry_t *ptes, opte; 1631 pd_entry_t **pdes; 1632 #ifdef DIAGNOSTIC 1633 pd_entry_t pde; 1634 #endif 1635 struct pg_to_free empty_ptps; 1636 struct vm_page *ptp; 1637 1638 TAILQ_INIT(&empty_ptps); 1639 1640 PMAP_HEAD_TO_MAP_LOCK(); 1641 1642 while ((pve = pg->mdpage.pv_list) != NULL) { 1643 pg->mdpage.pv_list = pve->pv_next; 1644 1645 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1646 1647 #ifdef DIAGNOSTIC 1648 if (pve->pv_ptp && pmap_pdes_valid(pve->pv_va, pdes, &pde) && 1649 (pde & PG_FRAME) != VM_PAGE_TO_PHYS(pve->pv_ptp)) { 1650 printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", 1651 pg, pve->pv_va, pve->pv_ptp); 1652 printf("pmap_page_remove: PTP's phys addr: " 1653 "actual=%lx, recorded=%lx\n", 1654 (unsigned long)(pde & PG_FRAME), 1655 VM_PAGE_TO_PHYS(pve->pv_ptp)); 1656 panic("pmap_page_remove: mapped managed page has " 1657 "invalid pv_ptp field"); 1658 } 1659 #endif 1660 1661 /* atomically save the old PTE and zap it */ 1662 opte = pmap_pte_set(&ptes[pl1_i(pve->pv_va)], 0); 1663 1664 if (opte & PG_W) 1665 pve->pv_pmap->pm_stats.wired_count--; 1666 pve->pv_pmap->pm_stats.resident_count--; 1667 1668 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); 1669 1670 pmap_sync_flags_pte(pg, opte); 1671 1672 /* update the PTP reference count. free if last reference. */ 1673 if (pve->pv_ptp) { 1674 pve->pv_ptp->wire_count--; 1675 if (pve->pv_ptp->wire_count <= 1) { 1676 pmap_free_ptp(pve->pv_pmap, pve->pv_ptp, 1677 pve->pv_va, ptes, pdes, &empty_ptps); 1678 } 1679 } 1680 pmap_unmap_ptes(pve->pv_pmap); 1681 pool_put(&pmap_pv_pool, pve); 1682 } 1683 1684 PMAP_HEAD_TO_MAP_UNLOCK(); 1685 pmap_tlb_shootwait(); 1686 1687 while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { 1688 TAILQ_REMOVE(&empty_ptps, ptp, pageq); 1689 uvm_pagefree(ptp); 1690 } 1691 } 1692 1693 /* 1694 * p m a p a t t r i b u t e f u n c t i o n s 1695 * functions that test/change managed page's attributes 1696 * since a page can be mapped multiple times we must check each PTE that 1697 * maps it by going down the pv lists. 1698 */ 1699 1700 /* 1701 * pmap_test_attrs: test a page's attributes 1702 */ 1703 1704 boolean_t 1705 pmap_test_attrs(struct vm_page *pg, unsigned int testbits) 1706 { 1707 struct pv_entry *pve; 1708 pt_entry_t *ptes, pte; 1709 pd_entry_t **pdes; 1710 u_long mybits, testflags; 1711 1712 testflags = pmap_pte2flags(testbits); 1713 1714 if (pg->pg_flags & testflags) 1715 return (TRUE); 1716 1717 PMAP_HEAD_TO_MAP_LOCK(); 1718 mybits = 0; 1719 for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0; 1720 pve = pve->pv_next) { 1721 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1722 pte = ptes[pl1_i(pve->pv_va)]; 1723 pmap_unmap_ptes(pve->pv_pmap); 1724 mybits |= (pte & testbits); 1725 } 1726 PMAP_HEAD_TO_MAP_UNLOCK(); 1727 1728 if (mybits == 0) 1729 return (FALSE); 1730 1731 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(mybits)); 1732 1733 return (TRUE); 1734 } 1735 1736 /* 1737 * pmap_clear_attrs: change a page's attributes 1738 * 1739 * => we return TRUE if we cleared one of the bits we were asked to 1740 */ 1741 1742 boolean_t 1743 pmap_clear_attrs(struct vm_page *pg, unsigned long clearbits) 1744 { 1745 struct pv_entry *pve; 1746 pt_entry_t *ptes, opte; 1747 pd_entry_t **pdes; 1748 u_long clearflags; 1749 int result; 1750 1751 clearflags = pmap_pte2flags(clearbits); 1752 1753 PMAP_HEAD_TO_MAP_LOCK(); 1754 1755 result = pg->pg_flags & clearflags; 1756 if (result) 1757 atomic_clearbits_int(&pg->pg_flags, clearflags); 1758 1759 for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) { 1760 pmap_map_ptes(pve->pv_pmap, &ptes, &pdes); 1761 #ifdef DIAGNOSTIC 1762 if (!pmap_pdes_valid(pve->pv_va, pdes, NULL)) 1763 panic("pmap_change_attrs: mapping without PTP " 1764 "detected"); 1765 #endif 1766 1767 opte = ptes[pl1_i(pve->pv_va)]; 1768 if (opte & clearbits) { 1769 result = 1; 1770 pmap_pte_clearbits(&ptes[pl1_i(pve->pv_va)], 1771 (opte & clearbits)); 1772 pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); 1773 } 1774 pmap_unmap_ptes(pve->pv_pmap); 1775 } 1776 1777 PMAP_HEAD_TO_MAP_UNLOCK(); 1778 1779 pmap_tlb_shootwait(); 1780 1781 return (result != 0); 1782 } 1783 1784 /* 1785 * p m a p p r o t e c t i o n f u n c t i o n s 1786 */ 1787 1788 /* 1789 * pmap_page_protect: change the protection of all recorded mappings 1790 * of a managed page 1791 * 1792 * => NOTE: this is an inline function in pmap.h 1793 */ 1794 1795 /* see pmap.h */ 1796 1797 /* 1798 * pmap_protect: set the protection in of the pages in a pmap 1799 * 1800 * => NOTE: this is an inline function in pmap.h 1801 */ 1802 1803 /* see pmap.h */ 1804 1805 /* 1806 * pmap_write_protect: write-protect pages in a pmap 1807 */ 1808 1809 void 1810 pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1811 { 1812 pt_entry_t nx, opte, *ptes, *spte, *epte; 1813 pd_entry_t **pdes; 1814 vaddr_t blockend; 1815 int shootall = 0; 1816 vaddr_t va; 1817 1818 pmap_map_ptes(pmap, &ptes, &pdes); 1819 1820 /* should be ok, but just in case ... */ 1821 sva &= PG_FRAME; 1822 eva &= PG_FRAME; 1823 1824 nx = 0; 1825 if ((cpu_feature & CPUID_NXE) && !(prot & VM_PROT_EXECUTE)) 1826 nx = PG_NX; 1827 1828 if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) 1829 shootall = 1; 1830 1831 for (va = sva; va < eva ; va = blockend) { 1832 blockend = (va & L2_FRAME) + NBPD_L2; 1833 if (blockend > eva) 1834 blockend = eva; 1835 1836 /* 1837 * XXXCDC: our PTE mappings should never be write-protected! 1838 * 1839 * long term solution is to move the PTEs out of user 1840 * address space. and into kernel address space (up 1841 * with APTE). then we can set VM_MAXUSER_ADDRESS to 1842 * be VM_MAX_ADDRESS. 1843 */ 1844 1845 /* XXXCDC: ugly hack to avoid freeing PDP here */ 1846 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE) 1847 continue; 1848 1849 /* empty block? */ 1850 if (!pmap_pdes_valid(va, pdes, NULL)) 1851 continue; 1852 1853 #ifdef DIAGNOSTIC 1854 if (va >= VM_MAXUSER_ADDRESS && va < VM_MAX_ADDRESS) 1855 panic("pmap_write_protect: PTE space"); 1856 #endif 1857 1858 spte = &ptes[pl1_i(va)]; 1859 epte = &ptes[pl1_i(blockend)]; 1860 1861 for (/*null */; spte < epte ; spte++) { 1862 if (!(*spte & PG_V)) 1863 continue; 1864 opte = *spte; 1865 pmap_pte_clearbits(spte, PG_RW); 1866 pmap_pte_setbits(spte, nx); 1867 } 1868 } 1869 1870 if (shootall) 1871 pmap_tlb_shoottlb(); 1872 else 1873 pmap_tlb_shootrange(pmap, sva, eva); 1874 1875 pmap_tlb_shootwait(); 1876 1877 pmap_unmap_ptes(pmap); 1878 } 1879 1880 /* 1881 * end of protection functions 1882 */ 1883 1884 /* 1885 * pmap_unwire: clear the wired bit in the PTE 1886 * 1887 * => mapping should already be in map 1888 */ 1889 1890 void 1891 pmap_unwire(struct pmap *pmap, vaddr_t va) 1892 { 1893 pt_entry_t *ptes; 1894 pd_entry_t **pdes; 1895 1896 pmap_map_ptes(pmap, &ptes, &pdes); 1897 1898 if (pmap_pdes_valid(va, pdes, NULL)) { 1899 1900 #ifdef DIAGNOSTIC 1901 if (!pmap_valid_entry(ptes[pl1_i(va)])) 1902 panic("pmap_unwire: invalid (unmapped) va 0x%lx", va); 1903 #endif 1904 if ((ptes[pl1_i(va)] & PG_W) != 0) { 1905 pmap_pte_clearbits(&ptes[pl1_i(va)], PG_W); 1906 pmap->pm_stats.wired_count--; 1907 } 1908 #ifdef DIAGNOSTIC 1909 else { 1910 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 1911 "didn't change!\n", pmap, va); 1912 } 1913 #endif 1914 pmap_unmap_ptes(pmap); 1915 } 1916 #ifdef DIAGNOSTIC 1917 else { 1918 panic("pmap_unwire: invalid PDE"); 1919 } 1920 #endif 1921 } 1922 1923 /* 1924 * pmap_collect: free resources held by a pmap 1925 * 1926 * => optional function. 1927 * => called when a process is swapped out to free memory. 1928 */ 1929 1930 void 1931 pmap_collect(struct pmap *pmap) 1932 { 1933 /* 1934 * free all of the pt pages by removing the physical mappings 1935 * for its entire address space. 1936 */ 1937 1938 /* pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, 1939 PMAP_REMOVE_SKIPWIRED); 1940 */ 1941 } 1942 1943 /* 1944 * pmap_copy: copy mappings from one pmap to another 1945 * 1946 * => optional function 1947 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 1948 */ 1949 1950 /* 1951 * defined as macro in pmap.h 1952 */ 1953 1954 /* 1955 * pmap_enter: enter a mapping into a pmap 1956 * 1957 * => must be done "now" ... no lazy-evaluation 1958 */ 1959 1960 int 1961 pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) 1962 { 1963 pt_entry_t *ptes, opte, npte; 1964 pd_entry_t **pdes; 1965 struct vm_page *ptp, *pg = NULL; 1966 struct pv_entry *pve = NULL; 1967 int ptpdelta, wireddelta, resdelta; 1968 boolean_t wired = (flags & PMAP_WIRED) != 0; 1969 boolean_t nocache = (pa & PMAP_NOCACHE) != 0; 1970 int error; 1971 1972 pa &= PMAP_PA_MASK; 1973 1974 #ifdef DIAGNOSTIC 1975 if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) 1976 panic("pmap_enter: trying to map over PDP/APDP!"); 1977 1978 /* sanity check: kernel PTPs should already have been pre-allocated */ 1979 if (va >= VM_MIN_KERNEL_ADDRESS && 1980 !pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)])) 1981 panic("pmap_enter: missing kernel PTP for va %lx!", va); 1982 1983 #endif 1984 1985 /* get lock */ 1986 PMAP_MAP_TO_HEAD_LOCK(); 1987 1988 /* 1989 * map in ptes and get a pointer to our PTP (unless we are the kernel) 1990 */ 1991 1992 pmap_map_ptes(pmap, &ptes, &pdes); 1993 if (pmap == pmap_kernel()) { 1994 ptp = NULL; 1995 } else { 1996 ptp = pmap_get_ptp(pmap, va, pdes); 1997 if (ptp == NULL) { 1998 if (flags & PMAP_CANFAIL) { 1999 error = ENOMEM; 2000 goto out; 2001 } 2002 panic("pmap_enter: get ptp failed"); 2003 } 2004 } 2005 opte = ptes[pl1_i(va)]; /* old PTE */ 2006 2007 /* 2008 * is there currently a valid mapping at our VA? 2009 */ 2010 2011 if (pmap_valid_entry(opte)) { 2012 /* 2013 * first, calculate pm_stats updates. resident count will not 2014 * change since we are replacing/changing a valid mapping. 2015 * wired count might change... 2016 */ 2017 2018 resdelta = 0; 2019 if (wired && (opte & PG_W) == 0) 2020 wireddelta = 1; 2021 else if (!wired && (opte & PG_W) != 0) 2022 wireddelta = -1; 2023 else 2024 wireddelta = 0; 2025 ptpdelta = 0; 2026 2027 /* 2028 * is the currently mapped PA the same as the one we 2029 * want to map? 2030 */ 2031 2032 if ((opte & PG_FRAME) == pa) { 2033 2034 /* if this is on the PVLIST, sync R/M bit */ 2035 if (opte & PG_PVLIST) { 2036 pg = PHYS_TO_VM_PAGE(pa); 2037 #ifdef DIAGNOSTIC 2038 if (pg == NULL) 2039 panic("pmap_enter: same pa PG_PVLIST " 2040 "mapping with unmanaged page " 2041 "pa = 0x%lx (0x%lx)", pa, 2042 atop(pa)); 2043 #endif 2044 pmap_sync_flags_pte(pg, opte); 2045 } else { 2046 #ifdef DIAGNOSTIC 2047 if (PHYS_TO_VM_PAGE(pa) != NULL) 2048 panic("pmap_enter: same pa, managed " 2049 "page, no PG_VLIST pa: 0x%lx\n", 2050 pa); 2051 #endif 2052 } 2053 goto enter_now; 2054 } 2055 2056 /* 2057 * changing PAs: we must remove the old one first 2058 */ 2059 2060 /* 2061 * if current mapping is on a pvlist, 2062 * remove it (sync R/M bits) 2063 */ 2064 2065 if (opte & PG_PVLIST) { 2066 pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); 2067 #ifdef DIAGNOSTIC 2068 if (pg == NULL) 2069 panic("pmap_enter: PG_PVLIST mapping with " 2070 "unmanaged page " 2071 "pa = 0x%lx (0x%lx)", pa, atop(pa)); 2072 #endif 2073 pmap_sync_flags_pte(pg, opte); 2074 pve = pmap_remove_pv(pg, pmap, va); 2075 pg = NULL; /* This is not the page we are looking for */ 2076 } 2077 } else { /* opte not valid */ 2078 pve = NULL; 2079 resdelta = 1; 2080 if (wired) 2081 wireddelta = 1; 2082 else 2083 wireddelta = 0; 2084 if (ptp) 2085 ptpdelta = 1; 2086 else 2087 ptpdelta = 0; 2088 } 2089 2090 /* 2091 * pve is either NULL or points to a now-free pv_entry structure 2092 * (the latter case is if we called pmap_remove_pv above). 2093 * 2094 * if this entry is to be on a pvlist, enter it now. 2095 */ 2096 2097 if (pmap_initialized) 2098 pg = PHYS_TO_VM_PAGE(pa); 2099 2100 if (pg != NULL) { 2101 if (pve == NULL) { 2102 pve = pool_get(&pmap_pv_pool, PR_NOWAIT); 2103 if (pve == NULL) { 2104 if (flags & PMAP_CANFAIL) { 2105 error = ENOMEM; 2106 goto out; 2107 } 2108 panic("pmap_enter: no pv entries available"); 2109 } 2110 } 2111 pmap_enter_pv(pg, pve, pmap, va, ptp); 2112 } else { 2113 /* new mapping is not PG_PVLIST. free pve if we've got one */ 2114 if (pve) 2115 pool_put(&pmap_pv_pool, pve); 2116 } 2117 2118 enter_now: 2119 /* 2120 * at this point pg is !NULL if we want the PG_PVLIST bit set 2121 */ 2122 2123 pmap->pm_stats.resident_count += resdelta; 2124 pmap->pm_stats.wired_count += wireddelta; 2125 if (ptp) 2126 ptp->wire_count += ptpdelta; 2127 2128 if (pg != PHYS_TO_VM_PAGE(pa)) 2129 panic("wtf?"); 2130 2131 npte = pa | protection_codes[prot] | PG_V; 2132 if (pg != NULL) 2133 npte |= PG_PVLIST; 2134 if (wired) 2135 npte |= PG_W; 2136 if (nocache) 2137 npte |= PG_N; 2138 if (va < VM_MAXUSER_ADDRESS) 2139 npte |= PG_u; 2140 else if (va < VM_MAX_ADDRESS) 2141 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ 2142 if (pmap == pmap_kernel()) 2143 npte |= pmap_pg_g; 2144 2145 ptes[pl1_i(va)] = npte; /* zap! */ 2146 2147 /* 2148 * If we changed anything other than modified/used bits, 2149 * flush the TLB. (is this overkill?) 2150 */ 2151 if (opte & PG_V) { 2152 if (nocache && (opte & PG_N) == 0) 2153 wbinvd(); 2154 pmap_tlb_shootpage(pmap, va); 2155 pmap_tlb_shootwait(); 2156 } 2157 2158 error = 0; 2159 2160 out: 2161 pmap_unmap_ptes(pmap); 2162 PMAP_MAP_TO_HEAD_UNLOCK(); 2163 2164 return error; 2165 } 2166 2167 boolean_t 2168 pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp) 2169 { 2170 struct vm_page *ptp; 2171 struct pmap *kpm = pmap_kernel(); 2172 2173 if (uvm.page_init_done == FALSE) { 2174 vaddr_t va; 2175 2176 /* 2177 * we're growing the kernel pmap early (from 2178 * uvm_pageboot_alloc()). this case must be 2179 * handled a little differently. 2180 */ 2181 2182 va = pmap_steal_memory(PAGE_SIZE, NULL, NULL); 2183 *paddrp = PMAP_DIRECT_UNMAP(va); 2184 } else { 2185 ptp = uvm_pagealloc(&kpm->pm_obj[level - 1], 2186 ptp_va2o(va, level), NULL, 2187 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 2188 if (ptp == NULL) 2189 panic("pmap_get_physpage: out of memory"); 2190 atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); 2191 ptp->wire_count = 1; 2192 *paddrp = VM_PAGE_TO_PHYS(ptp); 2193 } 2194 kpm->pm_stats.resident_count++; 2195 return TRUE; 2196 } 2197 2198 /* 2199 * Allocate the amount of specified ptps for a ptp level, and populate 2200 * all levels below accordingly, mapping virtual addresses starting at 2201 * kva. 2202 * 2203 * Used by pmap_growkernel. 2204 */ 2205 void 2206 pmap_alloc_level(pd_entry_t **pdes, vaddr_t kva, int lvl, long *needed_ptps) 2207 { 2208 unsigned long i; 2209 vaddr_t va; 2210 paddr_t pa; 2211 unsigned long index, endindex; 2212 int level; 2213 pd_entry_t *pdep; 2214 2215 for (level = lvl; level > 1; level--) { 2216 if (level == PTP_LEVELS) 2217 pdep = pmap_kernel()->pm_pdir; 2218 else 2219 pdep = pdes[level - 2]; 2220 va = kva; 2221 index = pl_i(kva, level); 2222 endindex = index + needed_ptps[level - 1]; 2223 /* 2224 * XXX special case for first time call. 2225 */ 2226 if (nkptp[level - 1] != 0) 2227 index++; 2228 else 2229 endindex--; 2230 2231 for (i = index; i <= endindex; i++) { 2232 pmap_get_physpage(va, level - 1, &pa); 2233 pdep[i] = pa | PG_RW | PG_V; 2234 nkptp[level - 1]++; 2235 va += nbpd[level - 1]; 2236 } 2237 } 2238 } 2239 2240 /* 2241 * pmap_growkernel: increase usage of KVM space 2242 * 2243 * => we allocate new PTPs for the kernel and install them in all 2244 * the pmaps on the system. 2245 */ 2246 2247 static vaddr_t pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS; 2248 2249 vaddr_t 2250 pmap_growkernel(vaddr_t maxkvaddr) 2251 { 2252 struct pmap *kpm = pmap_kernel(), *pm; 2253 int s, i; 2254 unsigned newpdes; 2255 long needed_kptp[PTP_LEVELS], target_nptp, old; 2256 2257 if (maxkvaddr <= pmap_maxkvaddr) 2258 return pmap_maxkvaddr; 2259 2260 maxkvaddr = x86_round_pdr(maxkvaddr); 2261 old = nkptp[PTP_LEVELS - 1]; 2262 /* 2263 * This loop could be optimized more, but pmap_growkernel() 2264 * is called infrequently. 2265 */ 2266 for (i = PTP_LEVELS - 1; i >= 1; i--) { 2267 target_nptp = pl_i(maxkvaddr, i + 1) - 2268 pl_i(VM_MIN_KERNEL_ADDRESS, i + 1); 2269 /* 2270 * XXX only need to check toplevel. 2271 */ 2272 if (target_nptp > nkptpmax[i]) 2273 panic("out of KVA space"); 2274 needed_kptp[i] = target_nptp - nkptp[i] + 1; 2275 } 2276 2277 2278 s = splhigh(); /* to be safe */ 2279 pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, 2280 needed_kptp); 2281 2282 /* 2283 * If the number of top level entries changed, update all 2284 * pmaps. 2285 */ 2286 if (needed_kptp[PTP_LEVELS - 1] != 0) { 2287 newpdes = nkptp[PTP_LEVELS - 1] - old; 2288 LIST_FOREACH(pm, &pmaps, pm_list) { 2289 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], 2290 &kpm->pm_pdir[PDIR_SLOT_KERN + old], 2291 newpdes * sizeof (pd_entry_t)); 2292 } 2293 2294 /* Invalidate the PDP cache. */ 2295 #if 0 2296 pool_cache_invalidate(&pmap_pdp_cache); 2297 #endif 2298 pmap_pdp_cache_generation++; 2299 } 2300 pmap_maxkvaddr = maxkvaddr; 2301 splx(s); 2302 2303 return maxkvaddr; 2304 } 2305 2306 vaddr_t 2307 pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end) 2308 { 2309 int segno; 2310 u_int npg; 2311 vaddr_t va; 2312 paddr_t pa; 2313 struct vm_physseg *seg; 2314 2315 size = round_page(size); 2316 npg = atop(size); 2317 2318 for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) { 2319 if (seg->avail_end - seg->avail_start < npg) 2320 continue; 2321 /* 2322 * We can only steal at an ``unused'' segment boundary, 2323 * i.e. either at the start or at the end. 2324 */ 2325 if (seg->avail_start == seg->start || 2326 seg->avail_end == seg->end) 2327 break; 2328 } 2329 if (segno == vm_nphysseg) { 2330 panic("pmap_steal_memory: out of memory"); 2331 } else { 2332 if (seg->avail_start == seg->start) { 2333 pa = ptoa(seg->avail_start); 2334 seg->avail_start += npg; 2335 seg->start += npg; 2336 } else { 2337 pa = ptoa(seg->avail_end) - size; 2338 seg->avail_end -= npg; 2339 seg->end -= npg; 2340 } 2341 /* 2342 * If all the segment has been consumed now, remove it. 2343 * Note that the crash dump code still knows about it 2344 * and will dump it correctly. 2345 */ 2346 if (seg->start == seg->end) { 2347 if (vm_nphysseg-- == 1) 2348 panic("pmap_steal_memory: out of memory"); 2349 while (segno < vm_nphysseg) { 2350 seg[0] = seg[1]; /* struct copy */ 2351 seg++; 2352 segno++; 2353 } 2354 } 2355 2356 va = PMAP_DIRECT_MAP(pa); 2357 memset((void *)va, 0, size); 2358 } 2359 2360 if (start != NULL) 2361 *start = virtual_avail; 2362 if (end != NULL) 2363 *end = VM_MAX_KERNEL_ADDRESS; 2364 2365 return (va); 2366 } 2367 2368 #ifdef DEBUG 2369 void pmap_dump(struct pmap *, vaddr_t, vaddr_t); 2370 2371 /* 2372 * pmap_dump: dump all the mappings from a pmap 2373 * 2374 * => caller should not be holding any pmap locks 2375 */ 2376 2377 void 2378 pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 2379 { 2380 pt_entry_t *ptes, *pte; 2381 pd_entry_t **pdes; 2382 vaddr_t blkendva; 2383 2384 /* 2385 * if end is out of range truncate. 2386 * if (end == start) update to max. 2387 */ 2388 2389 if (eva > VM_MAXUSER_ADDRESS || eva <= sva) 2390 eva = VM_MAXUSER_ADDRESS; 2391 2392 2393 PMAP_MAP_TO_HEAD_LOCK(); 2394 pmap_map_ptes(pmap, &ptes, &pdes); 2395 2396 /* 2397 * dumping a range of pages: we dump in PTP sized blocks (4MB) 2398 */ 2399 2400 for (/* null */ ; sva < eva ; sva = blkendva) { 2401 2402 /* determine range of block */ 2403 blkendva = x86_round_pdr(sva+1); 2404 if (blkendva > eva) 2405 blkendva = eva; 2406 2407 /* valid block? */ 2408 if (!pmap_pdes_valid(sva, pdes, NULL)) 2409 continue; 2410 2411 pte = &ptes[pl1_i(sva)]; 2412 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { 2413 if (!pmap_valid_entry(*pte)) 2414 continue; 2415 printf("va %#lx -> pa %#lx (pte=%#lx)\n", 2416 sva, *pte, *pte & PG_FRAME); 2417 } 2418 } 2419 pmap_unmap_ptes(pmap); 2420 PMAP_MAP_TO_HEAD_UNLOCK(); 2421 } 2422 #endif 2423 2424 void 2425 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 2426 { 2427 *vstartp = virtual_avail; 2428 *vendp = VM_MAX_KERNEL_ADDRESS; 2429 } 2430 2431 #ifdef MULTIPROCESSOR 2432 /* 2433 * Locking for tlb shootdown. 2434 * 2435 * We lock by setting tlb_shoot_wait to the number of cpus that will 2436 * receive our tlb shootdown. After sending the IPIs, we don't need to 2437 * worry about locking order or interrupts spinning for the lock because 2438 * the call that grabs the "lock" isn't the one that releases it. And 2439 * there is nothing that can block the IPI that releases the lock. 2440 * 2441 * The functions are organized so that we first count the number of 2442 * cpus we need to send the IPI to, then we grab the counter, then 2443 * we send the IPIs, then we finally do our own shootdown. 2444 * 2445 * Our shootdown is last to make it parallell with the other cpus 2446 * to shorten the spin time. 2447 * 2448 * Notice that we depend on failures to send IPIs only being able to 2449 * happen during boot. If they happen later, the above assumption 2450 * doesn't hold since we can end up in situations where noone will 2451 * release the lock if we get an interrupt in a bad moment. 2452 */ 2453 2454 volatile long tlb_shoot_wait; 2455 2456 volatile vaddr_t tlb_shoot_addr1; 2457 volatile vaddr_t tlb_shoot_addr2; 2458 2459 void 2460 pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) 2461 { 2462 struct cpu_info *ci, *self = curcpu(); 2463 CPU_INFO_ITERATOR cii; 2464 long wait = 0; 2465 int mask = 0; 2466 2467 CPU_INFO_FOREACH(cii, ci) { 2468 if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || 2469 !(ci->ci_flags & CPUF_RUNNING)) 2470 continue; 2471 mask |= 1 << ci->ci_cpuid; 2472 wait++; 2473 } 2474 2475 if (wait > 0) { 2476 int s = splvm(); 2477 2478 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2479 while (tlb_shoot_wait != 0) 2480 SPINLOCK_SPIN_HOOK; 2481 } 2482 tlb_shoot_addr1 = va; 2483 CPU_INFO_FOREACH(cii, ci) { 2484 if ((mask & 1 << ci->ci_cpuid) == 0) 2485 continue; 2486 if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0) 2487 panic("pmap_tlb_shootpage: ipi failed"); 2488 } 2489 splx(s); 2490 } 2491 2492 if (pmap_is_curpmap(pm)) 2493 pmap_update_pg(va); 2494 } 2495 2496 void 2497 pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) 2498 { 2499 struct cpu_info *ci, *self = curcpu(); 2500 CPU_INFO_ITERATOR cii; 2501 long wait = 0; 2502 int mask = 0; 2503 vaddr_t va; 2504 2505 CPU_INFO_FOREACH(cii, ci) { 2506 if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || 2507 !(ci->ci_flags & CPUF_RUNNING)) 2508 continue; 2509 mask |= 1 << ci->ci_cpuid; 2510 wait++; 2511 } 2512 2513 if (wait > 0) { 2514 int s = splvm(); 2515 2516 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2517 while (tlb_shoot_wait != 0) 2518 SPINLOCK_SPIN_HOOK; 2519 } 2520 tlb_shoot_addr1 = sva; 2521 tlb_shoot_addr2 = eva; 2522 CPU_INFO_FOREACH(cii, ci) { 2523 if ((mask & 1 << ci->ci_cpuid) == 0) 2524 continue; 2525 if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0) 2526 panic("pmap_tlb_shootrange: ipi failed"); 2527 } 2528 splx(s); 2529 } 2530 2531 if (pmap_is_curpmap(pm)) 2532 for (va = sva; va < eva; va += PAGE_SIZE) 2533 pmap_update_pg(va); 2534 } 2535 2536 void 2537 pmap_tlb_shoottlb(void) 2538 { 2539 struct cpu_info *ci, *self = curcpu(); 2540 CPU_INFO_ITERATOR cii; 2541 long wait = 0; 2542 int mask = 0; 2543 2544 CPU_INFO_FOREACH(cii, ci) { 2545 if (ci == self || !(ci->ci_flags & CPUF_RUNNING)) 2546 continue; 2547 mask |= 1 << ci->ci_cpuid; 2548 wait++; 2549 } 2550 2551 if (wait) { 2552 int s = splvm(); 2553 2554 while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { 2555 while (tlb_shoot_wait != 0) 2556 SPINLOCK_SPIN_HOOK; 2557 } 2558 2559 CPU_INFO_FOREACH(cii, ci) { 2560 if ((mask & 1 << ci->ci_cpuid) == 0) 2561 continue; 2562 if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0) 2563 panic("pmap_tlb_shoottlb: ipi failed"); 2564 } 2565 splx(s); 2566 } 2567 2568 tlbflush(); 2569 } 2570 2571 void 2572 pmap_tlb_shootwait(void) 2573 { 2574 while (tlb_shoot_wait != 0) 2575 SPINLOCK_SPIN_HOOK; 2576 } 2577 2578 #else 2579 2580 void 2581 pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) 2582 { 2583 if (pmap_is_curpmap(pm)) 2584 pmap_update_pg(va); 2585 2586 } 2587 2588 void 2589 pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) 2590 { 2591 vaddr_t va; 2592 2593 for (va = sva; va < eva; va += PAGE_SIZE) 2594 pmap_update_pg(va); 2595 2596 } 2597 2598 void 2599 pmap_tlb_shoottlb(void) 2600 { 2601 tlbflush(); 2602 } 2603 #endif /* MULTIPROCESSOR */ 2604