1 /* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */ 2 /*- 3 * Copyright 2011 Semihalf 4 * Copyright 2004 Olivier Houchard. 5 * Copyright 2003 Wasabi Systems, Inc. 6 * All rights reserved. 7 * 8 * Written by Steve C. Woodford for Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project by 21 * Wasabi Systems, Inc. 22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 23 * or promote products derived from this software without specific prior 24 * written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 * 38 * From: FreeBSD: src/sys/arm/arm/pmap.c,v 1.113 2009/07/24 13:50:29 39 */ 40 41 /*- 42 * Copyright (c) 2002-2003 Wasabi Systems, Inc. 43 * Copyright (c) 2001 Richard Earnshaw 44 * Copyright (c) 2001-2002 Christopher Gilbert 45 * All rights reserved. 46 * 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 2. Redistributions in binary form must reproduce the above copyright 50 * notice, this list of conditions and the following disclaimer in the 51 * documentation and/or other materials provided with the distribution. 52 * 3. The name of the company nor the name of the author may be used to 53 * endorse or promote products derived from this software without specific 54 * prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 57 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 58 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 59 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 60 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 61 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 62 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 */ 68 /*- 69 * Copyright (c) 1999 The NetBSD Foundation, Inc. 70 * All rights reserved. 71 * 72 * This code is derived from software contributed to The NetBSD Foundation 73 * by Charles M. Hannum. 74 * 75 * Redistribution and use in source and binary forms, with or without 76 * modification, are permitted provided that the following conditions 77 * are met: 78 * 1. Redistributions of source code must retain the above copyright 79 * notice, this list of conditions and the following disclaimer. 80 * 2. Redistributions in binary form must reproduce the above copyright 81 * notice, this list of conditions and the following disclaimer in the 82 * documentation and/or other materials provided with the distribution. 83 * 84 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 85 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 86 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 87 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 88 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 89 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 90 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 91 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 92 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 93 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 94 * POSSIBILITY OF SUCH DAMAGE. 95 */ 96 97 /*- 98 * Copyright (c) 1994-1998 Mark Brinicombe. 99 * Copyright (c) 1994 Brini. 100 * All rights reserved. 101 * 102 * This code is derived from software written for Brini by Mark Brinicombe 103 * 104 * Redistribution and use in source and binary forms, with or without 105 * modification, are permitted provided that the following conditions 106 * are met: 107 * 1. Redistributions of source code must retain the above copyright 108 * notice, this list of conditions and the following disclaimer. 109 * 2. Redistributions in binary form must reproduce the above copyright 110 * notice, this list of conditions and the following disclaimer in the 111 * documentation and/or other materials provided with the distribution. 112 * 3. All advertising materials mentioning features or use of this software 113 * must display the following acknowledgement: 114 * This product includes software developed by Mark Brinicombe. 115 * 4. The name of the author may not be used to endorse or promote products 116 * derived from this software without specific prior written permission. 117 * 118 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 119 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 120 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 121 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 122 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 123 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 124 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 125 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 126 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 127 * 128 * RiscBSD kernel project 129 * 130 * pmap.c 131 * 132 * Machine dependant vm stuff 133 * 134 * Created : 20/09/94 135 */ 136 137 /* 138 * Special compilation symbols 139 * PMAP_DEBUG - Build in pmap_debug_level code 140 * 141 * Note that pmap_mapdev() and pmap_unmapdev() are implemented in arm/devmap.c 142 */ 143 /* Include header files */ 144 145 #include "opt_vm.h" 146 #include "opt_pmap.h" 147 148 #include <sys/cdefs.h> 149 __FBSDID("$FreeBSD$"); 150 #include <sys/param.h> 151 #include <sys/systm.h> 152 #include <sys/kernel.h> 153 #include <sys/ktr.h> 154 #include <sys/lock.h> 155 #include <sys/proc.h> 156 #include <sys/malloc.h> 157 #include <sys/msgbuf.h> 158 #include <sys/mutex.h> 159 #include <sys/vmmeter.h> 160 #include <sys/mman.h> 161 #include <sys/rwlock.h> 162 #include <sys/smp.h> 163 #include <sys/sched.h> 164 #include <sys/sysctl.h> 165 166 #include <vm/vm.h> 167 #include <vm/vm_param.h> 168 #include <vm/uma.h> 169 #include <vm/pmap.h> 170 #include <vm/vm_kern.h> 171 #include <vm/vm_object.h> 172 #include <vm/vm_map.h> 173 #include <vm/vm_page.h> 174 #include <vm/vm_pageout.h> 175 #include <vm/vm_phys.h> 176 #include <vm/vm_extern.h> 177 #include <vm/vm_reserv.h> 178 179 #include <machine/md_var.h> 180 #include <machine/cpu.h> 181 #include <machine/cpufunc.h> 182 #include <machine/pcb.h> 183 184 #ifdef DEBUG 185 extern int last_fault_code; 186 #endif 187 188 #ifdef PMAP_DEBUG 189 #define PDEBUG(_lev_,_stat_) \ 190 if (pmap_debug_level >= (_lev_)) \ 191 ((_stat_)) 192 #define dprintf printf 193 194 int pmap_debug_level = 0; 195 #define PMAP_INLINE 196 #else /* PMAP_DEBUG */ 197 #define PDEBUG(_lev_,_stat_) /* Nothing */ 198 #define dprintf(x, arg...) 199 #define PMAP_INLINE __inline 200 #endif /* PMAP_DEBUG */ 201 202 #ifdef PV_STATS 203 #define PV_STAT(x) do { x ; } while (0) 204 #else 205 #define PV_STAT(x) do { } while (0) 206 #endif 207 208 #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 209 210 #ifdef ARM_L2_PIPT 211 #define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((pa), (size)) 212 #define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((pa), (size)) 213 #else 214 #define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((va), (size)) 215 #define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((va), (size)) 216 #endif 217 218 extern struct pv_addr systempage; 219 220 /* 221 * Internal function prototypes 222 */ 223 224 static PMAP_INLINE 225 struct pv_entry *pmap_find_pv(struct md_page *, pmap_t, vm_offset_t); 226 static void pmap_free_pv_chunk(struct pv_chunk *pc); 227 static void pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv); 228 static pv_entry_t pmap_get_pv_entry(pmap_t pmap, boolean_t try); 229 static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); 230 static boolean_t pmap_pv_insert_section(pmap_t, vm_offset_t, 231 vm_paddr_t); 232 static struct pv_entry *pmap_remove_pv(struct vm_page *, pmap_t, vm_offset_t); 233 static int pmap_pvh_wired_mappings(struct md_page *, int); 234 235 static int pmap_enter_locked(pmap_t, vm_offset_t, vm_page_t, 236 vm_prot_t, u_int); 237 static vm_paddr_t pmap_extract_locked(pmap_t pmap, vm_offset_t va); 238 static void pmap_alloc_l1(pmap_t); 239 static void pmap_free_l1(pmap_t); 240 241 static void pmap_map_section(pmap_t, vm_offset_t, vm_offset_t, 242 vm_prot_t, boolean_t); 243 static void pmap_promote_section(pmap_t, vm_offset_t); 244 static boolean_t pmap_demote_section(pmap_t, vm_offset_t); 245 static boolean_t pmap_enter_section(pmap_t, vm_offset_t, vm_page_t, 246 vm_prot_t); 247 static void pmap_remove_section(pmap_t, vm_offset_t); 248 249 static int pmap_clearbit(struct vm_page *, u_int); 250 251 static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t); 252 static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t); 253 static void pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int); 254 static vm_offset_t kernel_pt_lookup(vm_paddr_t); 255 256 static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); 257 258 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 259 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 260 vm_offset_t pmap_curmaxkvaddr; 261 vm_paddr_t kernel_l1pa; 262 263 vm_offset_t kernel_vm_end = 0; 264 265 vm_offset_t vm_max_kernel_address; 266 267 struct pmap kernel_pmap_store; 268 269 /* 270 * Resources for quickly copying and zeroing pages using virtual address space 271 * and page table entries that are pre-allocated per-CPU by pmap_init(). 272 */ 273 struct czpages { 274 struct mtx lock; 275 pt_entry_t *srcptep; 276 pt_entry_t *dstptep; 277 vm_offset_t srcva; 278 vm_offset_t dstva; 279 }; 280 static struct czpages cpu_czpages[MAXCPU]; 281 282 static void pmap_init_l1(struct l1_ttable *, pd_entry_t *); 283 /* 284 * These routines are called when the CPU type is identified to set up 285 * the PTE prototypes, cache modes, etc. 286 * 287 * The variables are always here, just in case LKMs need to reference 288 * them (though, they shouldn't). 289 */ 290 static void pmap_set_prot(pt_entry_t *pte, vm_prot_t prot, uint8_t user); 291 pt_entry_t pte_l1_s_cache_mode; 292 pt_entry_t pte_l1_s_cache_mode_pt; 293 294 pt_entry_t pte_l2_l_cache_mode; 295 pt_entry_t pte_l2_l_cache_mode_pt; 296 297 pt_entry_t pte_l2_s_cache_mode; 298 pt_entry_t pte_l2_s_cache_mode_pt; 299 300 struct msgbuf *msgbufp = 0; 301 302 /* 303 * Crashdump maps. 304 */ 305 static caddr_t crashdumpmap; 306 307 extern void bcopy_page(vm_offset_t, vm_offset_t); 308 extern void bzero_page(vm_offset_t); 309 310 char *_tmppt; 311 312 /* 313 * Metadata for L1 translation tables. 314 */ 315 struct l1_ttable { 316 /* Entry on the L1 Table list */ 317 SLIST_ENTRY(l1_ttable) l1_link; 318 319 /* Entry on the L1 Least Recently Used list */ 320 TAILQ_ENTRY(l1_ttable) l1_lru; 321 322 /* Track how many domains are allocated from this L1 */ 323 volatile u_int l1_domain_use_count; 324 325 /* 326 * A free-list of domain numbers for this L1. 327 * We avoid using ffs() and a bitmap to track domains since ffs() 328 * is slow on ARM. 329 */ 330 u_int8_t l1_domain_first; 331 u_int8_t l1_domain_free[PMAP_DOMAINS]; 332 333 /* Physical address of this L1 page table */ 334 vm_paddr_t l1_physaddr; 335 336 /* KVA of this L1 page table */ 337 pd_entry_t *l1_kva; 338 }; 339 340 /* 341 * Convert a virtual address into its L1 table index. That is, the 342 * index used to locate the L2 descriptor table pointer in an L1 table. 343 * This is basically used to index l1->l1_kva[]. 344 * 345 * Each L2 descriptor table represents 1MB of VA space. 346 */ 347 #define L1_IDX(va) (((vm_offset_t)(va)) >> L1_S_SHIFT) 348 349 /* 350 * L1 Page Tables are tracked using a Least Recently Used list. 351 * - New L1s are allocated from the HEAD. 352 * - Freed L1s are added to the TAIl. 353 * - Recently accessed L1s (where an 'access' is some change to one of 354 * the userland pmaps which owns this L1) are moved to the TAIL. 355 */ 356 static TAILQ_HEAD(, l1_ttable) l1_lru_list; 357 /* 358 * A list of all L1 tables 359 */ 360 static SLIST_HEAD(, l1_ttable) l1_list; 361 static struct mtx l1_lru_lock; 362 363 /* 364 * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots. 365 * 366 * This is normally 16MB worth L2 page descriptors for any given pmap. 367 * Reference counts are maintained for L2 descriptors so they can be 368 * freed when empty. 369 */ 370 struct l2_dtable { 371 /* The number of L2 page descriptors allocated to this l2_dtable */ 372 u_int l2_occupancy; 373 374 /* List of L2 page descriptors */ 375 struct l2_bucket { 376 pt_entry_t *l2b_kva; /* KVA of L2 Descriptor Table */ 377 vm_paddr_t l2b_phys; /* Physical address of same */ 378 u_short l2b_l1idx; /* This L2 table's L1 index */ 379 u_short l2b_occupancy; /* How many active descriptors */ 380 } l2_bucket[L2_BUCKET_SIZE]; 381 }; 382 383 /* pmap_kenter_internal flags */ 384 #define KENTER_CACHE 0x1 385 #define KENTER_DEVICE 0x2 386 #define KENTER_USER 0x4 387 388 /* 389 * Given an L1 table index, calculate the corresponding l2_dtable index 390 * and bucket index within the l2_dtable. 391 */ 392 #define L2_IDX(l1idx) (((l1idx) >> L2_BUCKET_LOG2) & \ 393 (L2_SIZE - 1)) 394 #define L2_BUCKET(l1idx) ((l1idx) & (L2_BUCKET_SIZE - 1)) 395 396 /* 397 * Given a virtual address, this macro returns the 398 * virtual address required to drop into the next L2 bucket. 399 */ 400 #define L2_NEXT_BUCKET(va) (((va) & L1_S_FRAME) + L1_S_SIZE) 401 402 /* 403 * We try to map the page tables write-through, if possible. However, not 404 * all CPUs have a write-through cache mode, so on those we have to sync 405 * the cache when we frob page tables. 406 * 407 * We try to evaluate this at compile time, if possible. However, it's 408 * not always possible to do that, hence this run-time var. 409 */ 410 int pmap_needs_pte_sync; 411 412 /* 413 * Macro to determine if a mapping might be resident in the 414 * instruction cache and/or TLB 415 */ 416 #define PTE_BEEN_EXECD(pte) (L2_S_EXECUTABLE(pte) && L2_S_REFERENCED(pte)) 417 418 /* 419 * Macro to determine if a mapping might be resident in the 420 * data cache and/or TLB 421 */ 422 #define PTE_BEEN_REFD(pte) (L2_S_REFERENCED(pte)) 423 424 #ifndef PMAP_SHPGPERPROC 425 #define PMAP_SHPGPERPROC 200 426 #endif 427 428 #define pmap_is_current(pm) ((pm) == pmap_kernel() || \ 429 curproc->p_vmspace->vm_map.pmap == (pm)) 430 431 /* 432 * Data for the pv entry allocation mechanism 433 */ 434 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 435 static int pv_entry_count, pv_entry_max, pv_entry_high_water; 436 static struct md_page *pv_table; 437 static int shpgperproc = PMAP_SHPGPERPROC; 438 439 struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 440 int pv_maxchunks; /* How many chunks we have KVA for */ 441 vm_offset_t pv_vafree; /* Freelist stored in the PTE */ 442 443 static __inline struct pv_chunk * 444 pv_to_chunk(pv_entry_t pv) 445 { 446 447 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 448 } 449 450 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 451 452 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 453 CTASSERT(_NPCM == 8); 454 CTASSERT(_NPCPV == 252); 455 456 #define PC_FREE0_6 0xfffffffful /* Free values for index 0 through 6 */ 457 #define PC_FREE7 0x0ffffffful /* Free values for index 7 */ 458 459 static const uint32_t pc_freemask[_NPCM] = { 460 PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, 461 PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, 462 PC_FREE0_6, PC_FREE7 463 }; 464 465 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 466 467 /* Superpages utilization enabled = 1 / disabled = 0 */ 468 static int sp_enabled = 1; 469 SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &sp_enabled, 0, 470 "Are large page mappings enabled?"); 471 472 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 473 "Current number of pv entries"); 474 475 #ifdef PV_STATS 476 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 477 478 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 479 "Current number of pv entry chunks"); 480 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 481 "Current number of pv entry chunks allocated"); 482 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 483 "Current number of pv entry chunks frees"); 484 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 485 "Number of times tried to get a chunk page but failed."); 486 487 static long pv_entry_frees, pv_entry_allocs; 488 static int pv_entry_spare; 489 490 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 491 "Current number of pv entry frees"); 492 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 493 "Current number of pv entry allocs"); 494 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 495 "Current number of spare pv entries"); 496 #endif 497 498 uma_zone_t l2zone; 499 static uma_zone_t l2table_zone; 500 static vm_offset_t pmap_kernel_l2dtable_kva; 501 static vm_offset_t pmap_kernel_l2ptp_kva; 502 static vm_paddr_t pmap_kernel_l2ptp_phys; 503 static struct rwlock pvh_global_lock; 504 505 int l1_mem_types[] = { 506 ARM_L1S_STRONG_ORD, 507 ARM_L1S_DEVICE_NOSHARE, 508 ARM_L1S_DEVICE_SHARE, 509 ARM_L1S_NRML_NOCACHE, 510 ARM_L1S_NRML_IWT_OWT, 511 ARM_L1S_NRML_IWB_OWB, 512 ARM_L1S_NRML_IWBA_OWBA 513 }; 514 515 int l2l_mem_types[] = { 516 ARM_L2L_STRONG_ORD, 517 ARM_L2L_DEVICE_NOSHARE, 518 ARM_L2L_DEVICE_SHARE, 519 ARM_L2L_NRML_NOCACHE, 520 ARM_L2L_NRML_IWT_OWT, 521 ARM_L2L_NRML_IWB_OWB, 522 ARM_L2L_NRML_IWBA_OWBA 523 }; 524 525 int l2s_mem_types[] = { 526 ARM_L2S_STRONG_ORD, 527 ARM_L2S_DEVICE_NOSHARE, 528 ARM_L2S_DEVICE_SHARE, 529 ARM_L2S_NRML_NOCACHE, 530 ARM_L2S_NRML_IWT_OWT, 531 ARM_L2S_NRML_IWB_OWB, 532 ARM_L2S_NRML_IWBA_OWBA 533 }; 534 535 /* 536 * This list exists for the benefit of pmap_map_chunk(). It keeps track 537 * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can 538 * find them as necessary. 539 * 540 * Note that the data on this list MUST remain valid after initarm() returns, 541 * as pmap_bootstrap() uses it to contruct L2 table metadata. 542 */ 543 SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list); 544 545 static void 546 pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt) 547 { 548 int i; 549 550 l1->l1_kva = l1pt; 551 l1->l1_domain_use_count = 0; 552 l1->l1_domain_first = 0; 553 554 for (i = 0; i < PMAP_DOMAINS; i++) 555 l1->l1_domain_free[i] = i + 1; 556 557 /* 558 * Copy the kernel's L1 entries to each new L1. 559 */ 560 if (l1pt != pmap_kernel()->pm_l1->l1_kva) 561 memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE); 562 563 if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0) 564 panic("pmap_init_l1: can't get PA of L1 at %p", l1pt); 565 SLIST_INSERT_HEAD(&l1_list, l1, l1_link); 566 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 567 } 568 569 static vm_offset_t 570 kernel_pt_lookup(vm_paddr_t pa) 571 { 572 struct pv_addr *pv; 573 574 SLIST_FOREACH(pv, &kernel_pt_list, pv_list) { 575 if (pv->pv_pa == pa) 576 return (pv->pv_va); 577 } 578 return (0); 579 } 580 581 void 582 pmap_pte_init_mmu_v6(void) 583 { 584 585 if (PTE_PAGETABLE >= 3) 586 pmap_needs_pte_sync = 1; 587 pte_l1_s_cache_mode = l1_mem_types[PTE_CACHE]; 588 pte_l2_l_cache_mode = l2l_mem_types[PTE_CACHE]; 589 pte_l2_s_cache_mode = l2s_mem_types[PTE_CACHE]; 590 591 pte_l1_s_cache_mode_pt = l1_mem_types[PTE_PAGETABLE]; 592 pte_l2_l_cache_mode_pt = l2l_mem_types[PTE_PAGETABLE]; 593 pte_l2_s_cache_mode_pt = l2s_mem_types[PTE_PAGETABLE]; 594 595 } 596 597 /* 598 * Allocate an L1 translation table for the specified pmap. 599 * This is called at pmap creation time. 600 */ 601 static void 602 pmap_alloc_l1(pmap_t pmap) 603 { 604 struct l1_ttable *l1; 605 u_int8_t domain; 606 607 /* 608 * Remove the L1 at the head of the LRU list 609 */ 610 mtx_lock(&l1_lru_lock); 611 l1 = TAILQ_FIRST(&l1_lru_list); 612 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); 613 614 /* 615 * Pick the first available domain number, and update 616 * the link to the next number. 617 */ 618 domain = l1->l1_domain_first; 619 l1->l1_domain_first = l1->l1_domain_free[domain]; 620 621 /* 622 * If there are still free domain numbers in this L1, 623 * put it back on the TAIL of the LRU list. 624 */ 625 if (++l1->l1_domain_use_count < PMAP_DOMAINS) 626 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 627 628 mtx_unlock(&l1_lru_lock); 629 630 /* 631 * Fix up the relevant bits in the pmap structure 632 */ 633 pmap->pm_l1 = l1; 634 pmap->pm_domain = domain + 1; 635 } 636 637 /* 638 * Free an L1 translation table. 639 * This is called at pmap destruction time. 640 */ 641 static void 642 pmap_free_l1(pmap_t pmap) 643 { 644 struct l1_ttable *l1 = pmap->pm_l1; 645 646 mtx_lock(&l1_lru_lock); 647 648 /* 649 * If this L1 is currently on the LRU list, remove it. 650 */ 651 if (l1->l1_domain_use_count < PMAP_DOMAINS) 652 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); 653 654 /* 655 * Free up the domain number which was allocated to the pmap 656 */ 657 l1->l1_domain_free[pmap->pm_domain - 1] = l1->l1_domain_first; 658 l1->l1_domain_first = pmap->pm_domain - 1; 659 l1->l1_domain_use_count--; 660 661 /* 662 * The L1 now must have at least 1 free domain, so add 663 * it back to the LRU list. If the use count is zero, 664 * put it at the head of the list, otherwise it goes 665 * to the tail. 666 */ 667 if (l1->l1_domain_use_count == 0) { 668 TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru); 669 } else 670 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); 671 672 mtx_unlock(&l1_lru_lock); 673 } 674 675 /* 676 * Returns a pointer to the L2 bucket associated with the specified pmap 677 * and VA, or NULL if no L2 bucket exists for the address. 678 */ 679 static PMAP_INLINE struct l2_bucket * 680 pmap_get_l2_bucket(pmap_t pmap, vm_offset_t va) 681 { 682 struct l2_dtable *l2; 683 struct l2_bucket *l2b; 684 u_short l1idx; 685 686 l1idx = L1_IDX(va); 687 688 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL || 689 (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL) 690 return (NULL); 691 692 return (l2b); 693 } 694 695 /* 696 * Returns a pointer to the L2 bucket associated with the specified pmap 697 * and VA. 698 * 699 * If no L2 bucket exists, perform the necessary allocations to put an L2 700 * bucket/page table in place. 701 * 702 * Note that if a new L2 bucket/page was allocated, the caller *must* 703 * increment the bucket occupancy counter appropriately *before* 704 * releasing the pmap's lock to ensure no other thread or cpu deallocates 705 * the bucket/page in the meantime. 706 */ 707 static struct l2_bucket * 708 pmap_alloc_l2_bucket(pmap_t pmap, vm_offset_t va) 709 { 710 struct l2_dtable *l2; 711 struct l2_bucket *l2b; 712 u_short l1idx; 713 714 l1idx = L1_IDX(va); 715 716 PMAP_ASSERT_LOCKED(pmap); 717 rw_assert(&pvh_global_lock, RA_WLOCKED); 718 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 719 /* 720 * No mapping at this address, as there is 721 * no entry in the L1 table. 722 * Need to allocate a new l2_dtable. 723 */ 724 PMAP_UNLOCK(pmap); 725 rw_wunlock(&pvh_global_lock); 726 if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) { 727 rw_wlock(&pvh_global_lock); 728 PMAP_LOCK(pmap); 729 return (NULL); 730 } 731 rw_wlock(&pvh_global_lock); 732 PMAP_LOCK(pmap); 733 if (pmap->pm_l2[L2_IDX(l1idx)] != NULL) { 734 /* 735 * Someone already allocated the l2_dtable while 736 * we were doing the same. 737 */ 738 uma_zfree(l2table_zone, l2); 739 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 740 } else { 741 bzero(l2, sizeof(*l2)); 742 /* 743 * Link it into the parent pmap 744 */ 745 pmap->pm_l2[L2_IDX(l1idx)] = l2; 746 } 747 } 748 749 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 750 751 /* 752 * Fetch pointer to the L2 page table associated with the address. 753 */ 754 if (l2b->l2b_kva == NULL) { 755 pt_entry_t *ptep; 756 757 /* 758 * No L2 page table has been allocated. Chances are, this 759 * is because we just allocated the l2_dtable, above. 760 */ 761 l2->l2_occupancy++; 762 PMAP_UNLOCK(pmap); 763 rw_wunlock(&pvh_global_lock); 764 ptep = uma_zalloc(l2zone, M_NOWAIT); 765 rw_wlock(&pvh_global_lock); 766 PMAP_LOCK(pmap); 767 if (l2b->l2b_kva != 0) { 768 /* We lost the race. */ 769 l2->l2_occupancy--; 770 uma_zfree(l2zone, ptep); 771 return (l2b); 772 } 773 l2b->l2b_phys = vtophys(ptep); 774 if (ptep == NULL) { 775 /* 776 * Oops, no more L2 page tables available at this 777 * time. We may need to deallocate the l2_dtable 778 * if we allocated a new one above. 779 */ 780 l2->l2_occupancy--; 781 if (l2->l2_occupancy == 0) { 782 pmap->pm_l2[L2_IDX(l1idx)] = NULL; 783 uma_zfree(l2table_zone, l2); 784 } 785 return (NULL); 786 } 787 788 l2b->l2b_kva = ptep; 789 l2b->l2b_l1idx = l1idx; 790 } 791 792 return (l2b); 793 } 794 795 static PMAP_INLINE void 796 pmap_free_l2_ptp(pt_entry_t *l2) 797 { 798 uma_zfree(l2zone, l2); 799 } 800 /* 801 * One or more mappings in the specified L2 descriptor table have just been 802 * invalidated. 803 * 804 * Garbage collect the metadata and descriptor table itself if necessary. 805 * 806 * The pmap lock must be acquired when this is called (not necessary 807 * for the kernel pmap). 808 */ 809 static void 810 pmap_free_l2_bucket(pmap_t pmap, struct l2_bucket *l2b, u_int count) 811 { 812 struct l2_dtable *l2; 813 pd_entry_t *pl1pd, l1pd; 814 pt_entry_t *ptep; 815 u_short l1idx; 816 817 818 /* 819 * Update the bucket's reference count according to how many 820 * PTEs the caller has just invalidated. 821 */ 822 l2b->l2b_occupancy -= count; 823 824 /* 825 * Note: 826 * 827 * Level 2 page tables allocated to the kernel pmap are never freed 828 * as that would require checking all Level 1 page tables and 829 * removing any references to the Level 2 page table. See also the 830 * comment elsewhere about never freeing bootstrap L2 descriptors. 831 * 832 * We make do with just invalidating the mapping in the L2 table. 833 * 834 * This isn't really a big deal in practice and, in fact, leads 835 * to a performance win over time as we don't need to continually 836 * alloc/free. 837 */ 838 if (l2b->l2b_occupancy > 0 || pmap == pmap_kernel()) 839 return; 840 841 /* 842 * There are no more valid mappings in this level 2 page table. 843 * Go ahead and NULL-out the pointer in the bucket, then 844 * free the page table. 845 */ 846 l1idx = l2b->l2b_l1idx; 847 ptep = l2b->l2b_kva; 848 l2b->l2b_kva = NULL; 849 850 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 851 852 /* 853 * If the L1 slot matches the pmap's domain 854 * number, then invalidate it. 855 */ 856 l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK); 857 if (l1pd == (L1_C_DOM(pmap->pm_domain) | L1_TYPE_C)) { 858 *pl1pd = 0; 859 PTE_SYNC(pl1pd); 860 cpu_tlb_flushD_SE((vm_offset_t)ptep); 861 cpu_cpwait(); 862 } 863 864 /* 865 * Release the L2 descriptor table back to the pool cache. 866 */ 867 pmap_free_l2_ptp(ptep); 868 869 /* 870 * Update the reference count in the associated l2_dtable 871 */ 872 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 873 if (--l2->l2_occupancy > 0) 874 return; 875 876 /* 877 * There are no more valid mappings in any of the Level 1 878 * slots managed by this l2_dtable. Go ahead and NULL-out 879 * the pointer in the parent pmap and free the l2_dtable. 880 */ 881 pmap->pm_l2[L2_IDX(l1idx)] = NULL; 882 uma_zfree(l2table_zone, l2); 883 } 884 885 /* 886 * Pool cache constructors for L2 descriptor tables, metadata and pmap 887 * structures. 888 */ 889 static int 890 pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags) 891 { 892 struct l2_bucket *l2b; 893 pt_entry_t *ptep, pte; 894 vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK; 895 896 /* 897 * The mappings for these page tables were initially made using 898 * pmap_kenter() by the pool subsystem. Therefore, the cache- 899 * mode will not be right for page table mappings. To avoid 900 * polluting the pmap_kenter() code with a special case for 901 * page tables, we simply fix up the cache-mode here if it's not 902 * correct. 903 */ 904 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 905 ptep = &l2b->l2b_kva[l2pte_index(va)]; 906 pte = *ptep; 907 908 cpu_idcache_wbinv_range(va, PAGE_SIZE); 909 pmap_l2cache_wbinv_range(va, pte & L2_S_FRAME, PAGE_SIZE); 910 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { 911 /* 912 * Page tables must have the cache-mode set to 913 * Write-Thru. 914 */ 915 *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; 916 PTE_SYNC(ptep); 917 cpu_tlb_flushD_SE(va); 918 cpu_cpwait(); 919 } 920 921 memset(mem, 0, L2_TABLE_SIZE_REAL); 922 return (0); 923 } 924 925 /* 926 * Modify pte bits for all ptes corresponding to the given physical address. 927 * We use `maskbits' rather than `clearbits' because we're always passing 928 * constants and the latter would require an extra inversion at run-time. 929 */ 930 static int 931 pmap_clearbit(struct vm_page *m, u_int maskbits) 932 { 933 struct l2_bucket *l2b; 934 struct pv_entry *pv, *pve, *next_pv; 935 struct md_page *pvh; 936 pd_entry_t *pl1pd; 937 pt_entry_t *ptep, npte, opte; 938 pmap_t pmap; 939 vm_offset_t va; 940 u_int oflags; 941 int count = 0; 942 943 rw_wlock(&pvh_global_lock); 944 if ((m->flags & PG_FICTITIOUS) != 0) 945 goto small_mappings; 946 947 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 948 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 949 va = pv->pv_va; 950 pmap = PV_PMAP(pv); 951 PMAP_LOCK(pmap); 952 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 953 KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO, 954 ("pmap_clearbit: valid section mapping expected")); 955 if ((maskbits & PVF_WRITE) && (pv->pv_flags & PVF_WRITE)) 956 (void)pmap_demote_section(pmap, va); 957 else if ((maskbits & PVF_REF) && L1_S_REFERENCED(*pl1pd)) { 958 if (pmap_demote_section(pmap, va)) { 959 if ((pv->pv_flags & PVF_WIRED) == 0) { 960 /* 961 * Remove the mapping to a single page 962 * so that a subsequent access may 963 * repromote. Since the underlying 964 * l2_bucket is fully populated, this 965 * removal never frees an entire 966 * l2_bucket. 967 */ 968 va += (VM_PAGE_TO_PHYS(m) & 969 L1_S_OFFSET); 970 l2b = pmap_get_l2_bucket(pmap, va); 971 KASSERT(l2b != NULL, 972 ("pmap_clearbit: no l2 bucket for " 973 "va 0x%#x, pmap 0x%p", va, pmap)); 974 ptep = &l2b->l2b_kva[l2pte_index(va)]; 975 *ptep = 0; 976 PTE_SYNC(ptep); 977 pmap_free_l2_bucket(pmap, l2b, 1); 978 pve = pmap_remove_pv(m, pmap, va); 979 KASSERT(pve != NULL, ("pmap_clearbit: " 980 "no PV entry for managed mapping")); 981 pmap_free_pv_entry(pmap, pve); 982 983 } 984 } 985 } else if ((maskbits & PVF_MOD) && L1_S_WRITABLE(*pl1pd)) { 986 if (pmap_demote_section(pmap, va)) { 987 if ((pv->pv_flags & PVF_WIRED) == 0) { 988 /* 989 * Write protect the mapping to a 990 * single page so that a subsequent 991 * write access may repromote. 992 */ 993 va += (VM_PAGE_TO_PHYS(m) & 994 L1_S_OFFSET); 995 l2b = pmap_get_l2_bucket(pmap, va); 996 KASSERT(l2b != NULL, 997 ("pmap_clearbit: no l2 bucket for " 998 "va 0x%#x, pmap 0x%p", va, pmap)); 999 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1000 if ((*ptep & L2_S_PROTO) != 0) { 1001 pve = pmap_find_pv(&m->md, 1002 pmap, va); 1003 KASSERT(pve != NULL, 1004 ("pmap_clearbit: no PV " 1005 "entry for managed mapping")); 1006 pve->pv_flags &= ~PVF_WRITE; 1007 *ptep |= L2_APX; 1008 PTE_SYNC(ptep); 1009 } 1010 } 1011 } 1012 } 1013 PMAP_UNLOCK(pmap); 1014 } 1015 1016 small_mappings: 1017 if (TAILQ_EMPTY(&m->md.pv_list)) { 1018 rw_wunlock(&pvh_global_lock); 1019 return (0); 1020 } 1021 1022 /* 1023 * Loop over all current mappings setting/clearing as appropos 1024 */ 1025 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1026 va = pv->pv_va; 1027 pmap = PV_PMAP(pv); 1028 oflags = pv->pv_flags; 1029 pv->pv_flags &= ~maskbits; 1030 1031 PMAP_LOCK(pmap); 1032 1033 l2b = pmap_get_l2_bucket(pmap, va); 1034 KASSERT(l2b != NULL, ("pmap_clearbit: no l2 bucket for " 1035 "va 0x%#x, pmap 0x%p", va, pmap)); 1036 1037 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1038 npte = opte = *ptep; 1039 1040 if (maskbits & (PVF_WRITE | PVF_MOD)) { 1041 /* make the pte read only */ 1042 npte |= L2_APX; 1043 } 1044 1045 if (maskbits & PVF_REF) { 1046 /* 1047 * Clear referenced flag in PTE so that we 1048 * will take a flag fault the next time the mapping 1049 * is referenced. 1050 */ 1051 npte &= ~L2_S_REF; 1052 } 1053 1054 CTR4(KTR_PMAP,"clearbit: pmap:%p bits:%x pte:%x->%x", 1055 pmap, maskbits, opte, npte); 1056 if (npte != opte) { 1057 count++; 1058 *ptep = npte; 1059 PTE_SYNC(ptep); 1060 /* Flush the TLB entry if a current pmap. */ 1061 if (PTE_BEEN_EXECD(opte)) 1062 cpu_tlb_flushID_SE(pv->pv_va); 1063 else if (PTE_BEEN_REFD(opte)) 1064 cpu_tlb_flushD_SE(pv->pv_va); 1065 cpu_cpwait(); 1066 } 1067 1068 PMAP_UNLOCK(pmap); 1069 1070 } 1071 1072 if (maskbits & PVF_WRITE) 1073 vm_page_aflag_clear(m, PGA_WRITEABLE); 1074 rw_wunlock(&pvh_global_lock); 1075 return (count); 1076 } 1077 1078 /* 1079 * main pv_entry manipulation functions: 1080 * pmap_enter_pv: enter a mapping onto a vm_page list 1081 * pmap_remove_pv: remove a mappiing from a vm_page list 1082 * 1083 * NOTE: pmap_enter_pv expects to lock the pvh itself 1084 * pmap_remove_pv expects the caller to lock the pvh before calling 1085 */ 1086 1087 /* 1088 * pmap_enter_pv: enter a mapping onto a vm_page's PV list 1089 * 1090 * => caller should hold the proper lock on pvh_global_lock 1091 * => caller should have pmap locked 1092 * => we will (someday) gain the lock on the vm_page's PV list 1093 * => caller should adjust ptp's wire_count before calling 1094 * => caller should not adjust pmap's wire_count 1095 */ 1096 static void 1097 pmap_enter_pv(struct vm_page *m, struct pv_entry *pve, pmap_t pmap, 1098 vm_offset_t va, u_int flags) 1099 { 1100 1101 rw_assert(&pvh_global_lock, RA_WLOCKED); 1102 1103 PMAP_ASSERT_LOCKED(pmap); 1104 pve->pv_va = va; 1105 pve->pv_flags = flags; 1106 1107 TAILQ_INSERT_HEAD(&m->md.pv_list, pve, pv_list); 1108 if (pve->pv_flags & PVF_WIRED) 1109 ++pmap->pm_stats.wired_count; 1110 } 1111 1112 /* 1113 * 1114 * pmap_find_pv: Find a pv entry 1115 * 1116 * => caller should hold lock on vm_page 1117 */ 1118 static PMAP_INLINE struct pv_entry * 1119 pmap_find_pv(struct md_page *md, pmap_t pmap, vm_offset_t va) 1120 { 1121 struct pv_entry *pv; 1122 1123 rw_assert(&pvh_global_lock, RA_WLOCKED); 1124 TAILQ_FOREACH(pv, &md->pv_list, pv_list) 1125 if (pmap == PV_PMAP(pv) && va == pv->pv_va) 1126 break; 1127 1128 return (pv); 1129 } 1130 1131 /* 1132 * vector_page_setprot: 1133 * 1134 * Manipulate the protection of the vector page. 1135 */ 1136 void 1137 vector_page_setprot(int prot) 1138 { 1139 struct l2_bucket *l2b; 1140 pt_entry_t *ptep; 1141 1142 l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page); 1143 1144 ptep = &l2b->l2b_kva[l2pte_index(vector_page)]; 1145 /* 1146 * Set referenced flag. 1147 * Vectors' page is always desired 1148 * to be allowed to reside in TLB. 1149 */ 1150 *ptep |= L2_S_REF; 1151 1152 pmap_set_prot(ptep, prot|VM_PROT_EXECUTE, 0); 1153 PTE_SYNC(ptep); 1154 cpu_tlb_flushID_SE(vector_page); 1155 cpu_cpwait(); 1156 } 1157 1158 static void 1159 pmap_set_prot(pt_entry_t *ptep, vm_prot_t prot, uint8_t user) 1160 { 1161 1162 *ptep &= ~(L2_S_PROT_MASK | L2_XN); 1163 1164 if (!(prot & VM_PROT_EXECUTE)) 1165 *ptep |= L2_XN; 1166 1167 /* Set defaults first - kernel read access */ 1168 *ptep |= L2_APX; 1169 *ptep |= L2_S_PROT_R; 1170 /* Now tune APs as desired */ 1171 if (user) 1172 *ptep |= L2_S_PROT_U; 1173 1174 if (prot & VM_PROT_WRITE) 1175 *ptep &= ~(L2_APX); 1176 } 1177 1178 /* 1179 * pmap_remove_pv: try to remove a mapping from a pv_list 1180 * 1181 * => caller should hold proper lock on pmap_main_lock 1182 * => pmap should be locked 1183 * => caller should hold lock on vm_page [so that attrs can be adjusted] 1184 * => caller should adjust ptp's wire_count and free PTP if needed 1185 * => caller should NOT adjust pmap's wire_count 1186 * => we return the removed pve 1187 */ 1188 static struct pv_entry * 1189 pmap_remove_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va) 1190 { 1191 struct pv_entry *pve; 1192 1193 rw_assert(&pvh_global_lock, RA_WLOCKED); 1194 PMAP_ASSERT_LOCKED(pmap); 1195 1196 pve = pmap_find_pv(&m->md, pmap, va); /* find corresponding pve */ 1197 if (pve != NULL) { 1198 TAILQ_REMOVE(&m->md.pv_list, pve, pv_list); 1199 if (pve->pv_flags & PVF_WIRED) 1200 --pmap->pm_stats.wired_count; 1201 } 1202 if (TAILQ_EMPTY(&m->md.pv_list)) 1203 vm_page_aflag_clear(m, PGA_WRITEABLE); 1204 1205 return(pve); /* return removed pve */ 1206 } 1207 1208 /* 1209 * 1210 * pmap_modify_pv: Update pv flags 1211 * 1212 * => caller should hold lock on vm_page [so that attrs can be adjusted] 1213 * => caller should NOT adjust pmap's wire_count 1214 * => we return the old flags 1215 * 1216 * Modify a physical-virtual mapping in the pv table 1217 */ 1218 static u_int 1219 pmap_modify_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va, 1220 u_int clr_mask, u_int set_mask) 1221 { 1222 struct pv_entry *npv; 1223 u_int flags, oflags; 1224 1225 PMAP_ASSERT_LOCKED(pmap); 1226 rw_assert(&pvh_global_lock, RA_WLOCKED); 1227 if ((npv = pmap_find_pv(&m->md, pmap, va)) == NULL) 1228 return (0); 1229 1230 /* 1231 * There is at least one VA mapping this page. 1232 */ 1233 oflags = npv->pv_flags; 1234 npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask; 1235 1236 if ((flags ^ oflags) & PVF_WIRED) { 1237 if (flags & PVF_WIRED) 1238 ++pmap->pm_stats.wired_count; 1239 else 1240 --pmap->pm_stats.wired_count; 1241 } 1242 1243 return (oflags); 1244 } 1245 1246 /* Function to set the debug level of the pmap code */ 1247 #ifdef PMAP_DEBUG 1248 void 1249 pmap_debug(int level) 1250 { 1251 pmap_debug_level = level; 1252 dprintf("pmap_debug: level=%d\n", pmap_debug_level); 1253 } 1254 #endif /* PMAP_DEBUG */ 1255 1256 void 1257 pmap_pinit0(struct pmap *pmap) 1258 { 1259 PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap)); 1260 1261 bcopy(kernel_pmap, pmap, sizeof(*pmap)); 1262 bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx)); 1263 PMAP_LOCK_INIT(pmap); 1264 TAILQ_INIT(&pmap->pm_pvchunk); 1265 } 1266 1267 /* 1268 * Initialize a vm_page's machine-dependent fields. 1269 */ 1270 void 1271 pmap_page_init(vm_page_t m) 1272 { 1273 1274 TAILQ_INIT(&m->md.pv_list); 1275 m->md.pv_memattr = VM_MEMATTR_DEFAULT; 1276 } 1277 1278 static vm_offset_t 1279 pmap_ptelist_alloc(vm_offset_t *head) 1280 { 1281 pt_entry_t *pte; 1282 vm_offset_t va; 1283 1284 va = *head; 1285 if (va == 0) 1286 return (va); /* Out of memory */ 1287 pte = vtopte(va); 1288 *head = *pte; 1289 if ((*head & L2_TYPE_MASK) != L2_TYPE_INV) 1290 panic("%s: va is not L2_TYPE_INV!", __func__); 1291 *pte = 0; 1292 return (va); 1293 } 1294 1295 static void 1296 pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 1297 { 1298 pt_entry_t *pte; 1299 1300 if ((va & L2_TYPE_MASK) != L2_TYPE_INV) 1301 panic("%s: freeing va that is not L2_TYPE INV!", __func__); 1302 pte = vtopte(va); 1303 *pte = *head; /* virtual! L2_TYPE is L2_TYPE_INV though */ 1304 *head = va; 1305 } 1306 1307 static void 1308 pmap_ptelist_init(vm_offset_t *head, void *base, int npages) 1309 { 1310 int i; 1311 vm_offset_t va; 1312 1313 *head = 0; 1314 for (i = npages - 1; i >= 0; i--) { 1315 va = (vm_offset_t)base + i * PAGE_SIZE; 1316 pmap_ptelist_free(head, va); 1317 } 1318 } 1319 1320 /* 1321 * Initialize the pmap module. 1322 * Called by vm_init, to initialize any structures that the pmap 1323 * system needs to map virtual memory. 1324 */ 1325 void 1326 pmap_init(void) 1327 { 1328 vm_size_t s; 1329 int i, pv_npg; 1330 1331 l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor, 1332 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1333 l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL, 1334 NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 1335 1336 /* 1337 * Are large page mappings supported and enabled? 1338 */ 1339 TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); 1340 if (sp_enabled) { 1341 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 1342 ("pmap_init: can't assign to pagesizes[1]")); 1343 pagesizes[1] = NBPDR; 1344 } 1345 1346 /* 1347 * Calculate the size of the pv head table for superpages. 1348 * Handle the possibility that "vm_phys_segs[...].end" is zero. 1349 */ 1350 pv_npg = trunc_1mpage(vm_phys_segs[vm_phys_nsegs - 1].end - 1351 PAGE_SIZE) / NBPDR + 1; 1352 1353 /* 1354 * Allocate memory for the pv head table for superpages. 1355 */ 1356 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 1357 s = round_page(s); 1358 pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 1359 M_WAITOK | M_ZERO); 1360 for (i = 0; i < pv_npg; i++) 1361 TAILQ_INIT(&pv_table[i].pv_list); 1362 1363 /* 1364 * Initialize the address space for the pv chunks. 1365 */ 1366 1367 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 1368 pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count; 1369 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 1370 pv_entry_max = roundup(pv_entry_max, _NPCPV); 1371 pv_entry_high_water = 9 * (pv_entry_max / 10); 1372 1373 pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 1374 pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); 1375 1376 if (pv_chunkbase == NULL) 1377 panic("pmap_init: not enough kvm for pv chunks"); 1378 1379 pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 1380 1381 /* 1382 * Now it is safe to enable pv_table recording. 1383 */ 1384 PDEBUG(1, printf("pmap_init: done!\n")); 1385 } 1386 1387 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 1388 "Max number of PV entries"); 1389 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 1390 "Page share factor per proc"); 1391 1392 static SYSCTL_NODE(_vm_pmap, OID_AUTO, section, CTLFLAG_RD, 0, 1393 "1MB page mapping counters"); 1394 1395 static u_long pmap_section_demotions; 1396 SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, demotions, CTLFLAG_RD, 1397 &pmap_section_demotions, 0, "1MB page demotions"); 1398 1399 static u_long pmap_section_mappings; 1400 SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, mappings, CTLFLAG_RD, 1401 &pmap_section_mappings, 0, "1MB page mappings"); 1402 1403 static u_long pmap_section_p_failures; 1404 SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, p_failures, CTLFLAG_RD, 1405 &pmap_section_p_failures, 0, "1MB page promotion failures"); 1406 1407 static u_long pmap_section_promotions; 1408 SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, promotions, CTLFLAG_RD, 1409 &pmap_section_promotions, 0, "1MB page promotions"); 1410 1411 int 1412 pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype, int user) 1413 { 1414 struct l2_dtable *l2; 1415 struct l2_bucket *l2b; 1416 pd_entry_t *pl1pd, l1pd; 1417 pt_entry_t *ptep, pte; 1418 vm_paddr_t pa; 1419 u_int l1idx; 1420 int rv = 0; 1421 1422 l1idx = L1_IDX(va); 1423 rw_wlock(&pvh_global_lock); 1424 PMAP_LOCK(pmap); 1425 /* 1426 * Check and possibly fix-up L1 section mapping 1427 * only when superpage mappings are enabled to speed up. 1428 */ 1429 if (sp_enabled) { 1430 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 1431 l1pd = *pl1pd; 1432 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 1433 /* Catch an access to the vectors section */ 1434 if (l1idx == L1_IDX(vector_page)) 1435 goto out; 1436 /* 1437 * Stay away from the kernel mappings. 1438 * None of them should fault from L1 entry. 1439 */ 1440 if (pmap == pmap_kernel()) 1441 goto out; 1442 /* 1443 * Catch a forbidden userland access 1444 */ 1445 if (user && !(l1pd & L1_S_PROT_U)) 1446 goto out; 1447 /* 1448 * Superpage is always either mapped read only 1449 * or it is modified and permitted to be written 1450 * by default. Therefore, process only reference 1451 * flag fault and demote page in case of write fault. 1452 */ 1453 if ((ftype & VM_PROT_WRITE) && !L1_S_WRITABLE(l1pd) && 1454 L1_S_REFERENCED(l1pd)) { 1455 (void)pmap_demote_section(pmap, va); 1456 goto out; 1457 } else if (!L1_S_REFERENCED(l1pd)) { 1458 /* Mark the page "referenced" */ 1459 *pl1pd = l1pd | L1_S_REF; 1460 PTE_SYNC(pl1pd); 1461 goto l1_section_out; 1462 } else 1463 goto out; 1464 } 1465 } 1466 /* 1467 * If there is no l2_dtable for this address, then the process 1468 * has no business accessing it. 1469 * 1470 * Note: This will catch userland processes trying to access 1471 * kernel addresses. 1472 */ 1473 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 1474 if (l2 == NULL) 1475 goto out; 1476 1477 /* 1478 * Likewise if there is no L2 descriptor table 1479 */ 1480 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 1481 if (l2b->l2b_kva == NULL) 1482 goto out; 1483 1484 /* 1485 * Check the PTE itself. 1486 */ 1487 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1488 pte = *ptep; 1489 if (pte == 0) 1490 goto out; 1491 1492 /* 1493 * Catch a userland access to the vector page mapped at 0x0 1494 */ 1495 if (user && !(pte & L2_S_PROT_U)) 1496 goto out; 1497 if (va == vector_page) 1498 goto out; 1499 1500 pa = l2pte_pa(pte); 1501 CTR5(KTR_PMAP, "pmap_fault_fix: pmap:%p va:%x pte:0x%x ftype:%x user:%x", 1502 pmap, va, pte, ftype, user); 1503 if ((ftype & VM_PROT_WRITE) && !(L2_S_WRITABLE(pte)) && 1504 L2_S_REFERENCED(pte)) { 1505 /* 1506 * This looks like a good candidate for "page modified" 1507 * emulation... 1508 */ 1509 struct pv_entry *pv; 1510 struct vm_page *m; 1511 1512 /* Extract the physical address of the page */ 1513 if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) { 1514 goto out; 1515 } 1516 /* Get the current flags for this page. */ 1517 1518 pv = pmap_find_pv(&m->md, pmap, va); 1519 if (pv == NULL) { 1520 goto out; 1521 } 1522 1523 /* 1524 * Do the flags say this page is writable? If not then it 1525 * is a genuine write fault. If yes then the write fault is 1526 * our fault as we did not reflect the write access in the 1527 * PTE. Now we know a write has occurred we can correct this 1528 * and also set the modified bit 1529 */ 1530 if ((pv->pv_flags & PVF_WRITE) == 0) { 1531 goto out; 1532 } 1533 1534 vm_page_dirty(m); 1535 1536 /* Re-enable write permissions for the page */ 1537 *ptep = (pte & ~L2_APX); 1538 PTE_SYNC(ptep); 1539 rv = 1; 1540 CTR1(KTR_PMAP, "pmap_fault_fix: new pte:0x%x", *ptep); 1541 } else if (!L2_S_REFERENCED(pte)) { 1542 /* 1543 * This looks like a good candidate for "page referenced" 1544 * emulation. 1545 */ 1546 struct pv_entry *pv; 1547 struct vm_page *m; 1548 1549 /* Extract the physical address of the page */ 1550 if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) 1551 goto out; 1552 /* Get the current flags for this page. */ 1553 pv = pmap_find_pv(&m->md, pmap, va); 1554 if (pv == NULL) 1555 goto out; 1556 1557 vm_page_aflag_set(m, PGA_REFERENCED); 1558 1559 /* Mark the page "referenced" */ 1560 *ptep = pte | L2_S_REF; 1561 PTE_SYNC(ptep); 1562 rv = 1; 1563 CTR1(KTR_PMAP, "pmap_fault_fix: new pte:0x%x", *ptep); 1564 } 1565 1566 /* 1567 * We know there is a valid mapping here, so simply 1568 * fix up the L1 if necessary. 1569 */ 1570 pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 1571 l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 1572 if (*pl1pd != l1pd) { 1573 *pl1pd = l1pd; 1574 PTE_SYNC(pl1pd); 1575 rv = 1; 1576 } 1577 1578 #ifdef DEBUG 1579 /* 1580 * If 'rv == 0' at this point, it generally indicates that there is a 1581 * stale TLB entry for the faulting address. This happens when two or 1582 * more processes are sharing an L1. Since we don't flush the TLB on 1583 * a context switch between such processes, we can take domain faults 1584 * for mappings which exist at the same VA in both processes. EVEN IF 1585 * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for 1586 * example. 1587 * 1588 * This is extremely likely to happen if pmap_enter() updated the L1 1589 * entry for a recently entered mapping. In this case, the TLB is 1590 * flushed for the new mapping, but there may still be TLB entries for 1591 * other mappings belonging to other processes in the 1MB range 1592 * covered by the L1 entry. 1593 * 1594 * Since 'rv == 0', we know that the L1 already contains the correct 1595 * value, so the fault must be due to a stale TLB entry. 1596 * 1597 * Since we always need to flush the TLB anyway in the case where we 1598 * fixed up the L1, or frobbed the L2 PTE, we effectively deal with 1599 * stale TLB entries dynamically. 1600 * 1601 * However, the above condition can ONLY happen if the current L1 is 1602 * being shared. If it happens when the L1 is unshared, it indicates 1603 * that other parts of the pmap are not doing their job WRT managing 1604 * the TLB. 1605 */ 1606 if (rv == 0 && pmap->pm_l1->l1_domain_use_count == 1) { 1607 printf("fixup: pmap %p, va 0x%08x, ftype %d - nothing to do!\n", 1608 pmap, va, ftype); 1609 printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n", 1610 l2, l2b, ptep, pl1pd); 1611 printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n", 1612 pte, l1pd, last_fault_code); 1613 #ifdef DDB 1614 Debugger(); 1615 #endif 1616 } 1617 #endif 1618 1619 l1_section_out: 1620 cpu_tlb_flushID_SE(va); 1621 cpu_cpwait(); 1622 1623 rv = 1; 1624 1625 out: 1626 rw_wunlock(&pvh_global_lock); 1627 PMAP_UNLOCK(pmap); 1628 return (rv); 1629 } 1630 1631 void 1632 pmap_postinit(void) 1633 { 1634 struct l2_bucket *l2b; 1635 struct l1_ttable *l1; 1636 pd_entry_t *pl1pt; 1637 pt_entry_t *ptep, pte; 1638 vm_offset_t va, eva; 1639 u_int loop, needed; 1640 1641 needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0); 1642 needed -= 1; 1643 l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK); 1644 1645 for (loop = 0; loop < needed; loop++, l1++) { 1646 /* Allocate a L1 page table */ 1647 va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0, 1648 0xffffffff, L1_TABLE_SIZE, 0); 1649 1650 if (va == 0) 1651 panic("Cannot allocate L1 KVM"); 1652 1653 eva = va + L1_TABLE_SIZE; 1654 pl1pt = (pd_entry_t *)va; 1655 1656 while (va < eva) { 1657 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 1658 ptep = &l2b->l2b_kva[l2pte_index(va)]; 1659 pte = *ptep; 1660 pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; 1661 *ptep = pte; 1662 PTE_SYNC(ptep); 1663 cpu_tlb_flushID_SE(va); 1664 cpu_cpwait(); 1665 va += PAGE_SIZE; 1666 } 1667 pmap_init_l1(l1, pl1pt); 1668 } 1669 #ifdef DEBUG 1670 printf("pmap_postinit: Allocated %d static L1 descriptor tables\n", 1671 needed); 1672 #endif 1673 } 1674 1675 /* 1676 * This is used to stuff certain critical values into the PCB where they 1677 * can be accessed quickly from cpu_switch() et al. 1678 */ 1679 void 1680 pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) 1681 { 1682 struct l2_bucket *l2b; 1683 1684 pcb->pcb_pagedir = pmap->pm_l1->l1_physaddr; 1685 pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | 1686 (DOMAIN_CLIENT << (pmap->pm_domain * 2)); 1687 1688 if (vector_page < KERNBASE) { 1689 pcb->pcb_pl1vec = &pmap->pm_l1->l1_kva[L1_IDX(vector_page)]; 1690 l2b = pmap_get_l2_bucket(pmap, vector_page); 1691 pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO | 1692 L1_C_DOM(pmap->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL); 1693 } else 1694 pcb->pcb_pl1vec = NULL; 1695 } 1696 1697 void 1698 pmap_activate(struct thread *td) 1699 { 1700 pmap_t pmap; 1701 struct pcb *pcb; 1702 1703 pmap = vmspace_pmap(td->td_proc->p_vmspace); 1704 pcb = td->td_pcb; 1705 1706 critical_enter(); 1707 pmap_set_pcb_pagedir(pmap, pcb); 1708 1709 if (td == curthread) { 1710 u_int cur_dacr, cur_ttb; 1711 1712 __asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb)); 1713 __asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr)); 1714 1715 cur_ttb &= ~(L1_TABLE_SIZE - 1); 1716 1717 if (cur_ttb == (u_int)pcb->pcb_pagedir && 1718 cur_dacr == pcb->pcb_dacr) { 1719 /* 1720 * No need to switch address spaces. 1721 */ 1722 critical_exit(); 1723 return; 1724 } 1725 1726 1727 /* 1728 * We MUST, I repeat, MUST fix up the L1 entry corresponding 1729 * to 'vector_page' in the incoming L1 table before switching 1730 * to it otherwise subsequent interrupts/exceptions (including 1731 * domain faults!) will jump into hyperspace. 1732 */ 1733 if (pcb->pcb_pl1vec) { 1734 *pcb->pcb_pl1vec = pcb->pcb_l1vec; 1735 } 1736 1737 cpu_domains(pcb->pcb_dacr); 1738 cpu_setttb(pcb->pcb_pagedir); 1739 } 1740 critical_exit(); 1741 } 1742 1743 static int 1744 pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va) 1745 { 1746 pd_entry_t *pdep, pde; 1747 pt_entry_t *ptep, pte; 1748 vm_offset_t pa; 1749 int rv = 0; 1750 1751 /* 1752 * Make sure the descriptor itself has the correct cache mode 1753 */ 1754 pdep = &kl1[L1_IDX(va)]; 1755 pde = *pdep; 1756 1757 if (l1pte_section_p(pde)) { 1758 if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) { 1759 *pdep = (pde & ~L1_S_CACHE_MASK) | 1760 pte_l1_s_cache_mode_pt; 1761 PTE_SYNC(pdep); 1762 rv = 1; 1763 } 1764 } else { 1765 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); 1766 ptep = (pt_entry_t *)kernel_pt_lookup(pa); 1767 if (ptep == NULL) 1768 panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep); 1769 1770 ptep = &ptep[l2pte_index(va)]; 1771 pte = *ptep; 1772 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { 1773 *ptep = (pte & ~L2_S_CACHE_MASK) | 1774 pte_l2_s_cache_mode_pt; 1775 PTE_SYNC(ptep); 1776 rv = 1; 1777 } 1778 } 1779 1780 return (rv); 1781 } 1782 1783 static void 1784 pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap, 1785 pt_entry_t **ptep) 1786 { 1787 vm_offset_t va = *availp; 1788 struct l2_bucket *l2b; 1789 1790 if (ptep) { 1791 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 1792 if (l2b == NULL) 1793 panic("pmap_alloc_specials: no l2b for 0x%x", va); 1794 1795 *ptep = &l2b->l2b_kva[l2pte_index(va)]; 1796 } 1797 1798 *vap = va; 1799 *availp = va + (PAGE_SIZE * pages); 1800 } 1801 1802 /* 1803 * Bootstrap the system enough to run with virtual memory. 1804 * 1805 * On the arm this is called after mapping has already been enabled 1806 * and just syncs the pmap module with what has already been done. 1807 * [We can't call it easily with mapping off since the kernel is not 1808 * mapped with PA == VA, hence we would have to relocate every address 1809 * from the linked base (virtual) address "KERNBASE" to the actual 1810 * (physical) address starting relative to 0] 1811 */ 1812 #define PMAP_STATIC_L2_SIZE 16 1813 1814 void 1815 pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt) 1816 { 1817 static struct l1_ttable static_l1; 1818 static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE]; 1819 struct l1_ttable *l1 = &static_l1; 1820 struct l2_dtable *l2; 1821 struct l2_bucket *l2b; 1822 struct czpages *czp; 1823 pd_entry_t pde; 1824 pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va; 1825 pt_entry_t *ptep; 1826 vm_paddr_t pa; 1827 vm_offset_t va; 1828 vm_size_t size; 1829 int i, l1idx, l2idx, l2next = 0; 1830 1831 PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n", 1832 firstaddr, vm_max_kernel_address)); 1833 1834 virtual_avail = firstaddr; 1835 kernel_pmap->pm_l1 = l1; 1836 kernel_l1pa = l1pt->pv_pa; 1837 1838 /* 1839 * Scan the L1 translation table created by initarm() and create 1840 * the required metadata for all valid mappings found in it. 1841 */ 1842 for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) { 1843 pde = kernel_l1pt[l1idx]; 1844 1845 /* 1846 * We're only interested in Coarse mappings. 1847 * pmap_extract() can deal with section mappings without 1848 * recourse to checking L2 metadata. 1849 */ 1850 if ((pde & L1_TYPE_MASK) != L1_TYPE_C) 1851 continue; 1852 1853 /* 1854 * Lookup the KVA of this L2 descriptor table 1855 */ 1856 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); 1857 ptep = (pt_entry_t *)kernel_pt_lookup(pa); 1858 1859 if (ptep == NULL) { 1860 panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx", 1861 (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa); 1862 } 1863 1864 /* 1865 * Fetch the associated L2 metadata structure. 1866 * Allocate a new one if necessary. 1867 */ 1868 if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 1869 if (l2next == PMAP_STATIC_L2_SIZE) 1870 panic("pmap_bootstrap: out of static L2s"); 1871 kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 = 1872 &static_l2[l2next++]; 1873 } 1874 1875 /* 1876 * One more L1 slot tracked... 1877 */ 1878 l2->l2_occupancy++; 1879 1880 /* 1881 * Fill in the details of the L2 descriptor in the 1882 * appropriate bucket. 1883 */ 1884 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 1885 l2b->l2b_kva = ptep; 1886 l2b->l2b_phys = pa; 1887 l2b->l2b_l1idx = l1idx; 1888 1889 /* 1890 * Establish an initial occupancy count for this descriptor 1891 */ 1892 for (l2idx = 0; 1893 l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)); 1894 l2idx++) { 1895 if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) { 1896 l2b->l2b_occupancy++; 1897 } 1898 } 1899 1900 /* 1901 * Make sure the descriptor itself has the correct cache mode. 1902 * If not, fix it, but whine about the problem. Port-meisters 1903 * should consider this a clue to fix up their initarm() 1904 * function. :) 1905 */ 1906 if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) { 1907 printf("pmap_bootstrap: WARNING! wrong cache mode for " 1908 "L2 pte @ %p\n", ptep); 1909 } 1910 } 1911 1912 1913 /* 1914 * Ensure the primary (kernel) L1 has the correct cache mode for 1915 * a page table. Bitch if it is not correctly set. 1916 */ 1917 for (va = (vm_offset_t)kernel_l1pt; 1918 va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) { 1919 if (pmap_set_pt_cache_mode(kernel_l1pt, va)) 1920 printf("pmap_bootstrap: WARNING! wrong cache mode for " 1921 "primary L1 @ 0x%x\n", va); 1922 } 1923 1924 cpu_dcache_wbinv_all(); 1925 cpu_l2cache_wbinv_all(); 1926 cpu_tlb_flushID(); 1927 cpu_cpwait(); 1928 1929 PMAP_LOCK_INIT(kernel_pmap); 1930 CPU_FILL(&kernel_pmap->pm_active); 1931 kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL; 1932 TAILQ_INIT(&kernel_pmap->pm_pvchunk); 1933 1934 /* 1935 * Initialize the global pv list lock. 1936 */ 1937 rw_init(&pvh_global_lock, "pmap pv global"); 1938 1939 /* 1940 * Reserve some special page table entries/VA space for temporary 1941 * mapping of pages that are being copied or zeroed. 1942 */ 1943 for (czp = cpu_czpages, i = 0; i < MAXCPU; ++i, ++czp) { 1944 mtx_init(&czp->lock, "czpages", NULL, MTX_DEF); 1945 pmap_alloc_specials(&virtual_avail, 1, &czp->srcva, &czp->srcptep); 1946 pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->srcptep); 1947 pmap_alloc_specials(&virtual_avail, 1, &czp->dstva, &czp->dstptep); 1948 pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->dstptep); 1949 } 1950 1951 size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) / 1952 L1_S_SIZE; 1953 pmap_alloc_specials(&virtual_avail, 1954 round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE, 1955 &pmap_kernel_l2ptp_kva, NULL); 1956 1957 size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE; 1958 pmap_alloc_specials(&virtual_avail, 1959 round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE, 1960 &pmap_kernel_l2dtable_kva, NULL); 1961 1962 pmap_alloc_specials(&virtual_avail, 1963 1, (vm_offset_t*)&_tmppt, NULL); 1964 pmap_alloc_specials(&virtual_avail, 1965 MAXDUMPPGS, (vm_offset_t *)&crashdumpmap, NULL); 1966 SLIST_INIT(&l1_list); 1967 TAILQ_INIT(&l1_lru_list); 1968 mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF); 1969 pmap_init_l1(l1, kernel_l1pt); 1970 cpu_dcache_wbinv_all(); 1971 cpu_l2cache_wbinv_all(); 1972 cpu_tlb_flushID(); 1973 cpu_cpwait(); 1974 1975 virtual_avail = round_page(virtual_avail); 1976 virtual_end = vm_max_kernel_address; 1977 kernel_vm_end = pmap_curmaxkvaddr; 1978 1979 pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb); 1980 } 1981 1982 /*************************************************** 1983 * Pmap allocation/deallocation routines. 1984 ***************************************************/ 1985 1986 /* 1987 * Release any resources held by the given physical map. 1988 * Called when a pmap initialized by pmap_pinit is being released. 1989 * Should only be called if the map contains no valid mappings. 1990 */ 1991 void 1992 pmap_release(pmap_t pmap) 1993 { 1994 struct pcb *pcb; 1995 1996 cpu_tlb_flushID(); 1997 cpu_cpwait(); 1998 if (vector_page < KERNBASE) { 1999 struct pcb *curpcb = PCPU_GET(curpcb); 2000 pcb = thread0.td_pcb; 2001 if (pmap_is_current(pmap)) { 2002 /* 2003 * Frob the L1 entry corresponding to the vector 2004 * page so that it contains the kernel pmap's domain 2005 * number. This will ensure pmap_remove() does not 2006 * pull the current vector page out from under us. 2007 */ 2008 critical_enter(); 2009 *pcb->pcb_pl1vec = pcb->pcb_l1vec; 2010 cpu_domains(pcb->pcb_dacr); 2011 cpu_setttb(pcb->pcb_pagedir); 2012 critical_exit(); 2013 } 2014 pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE); 2015 /* 2016 * Make sure cpu_switch(), et al, DTRT. This is safe to do 2017 * since this process has no remaining mappings of its own. 2018 */ 2019 curpcb->pcb_pl1vec = pcb->pcb_pl1vec; 2020 curpcb->pcb_l1vec = pcb->pcb_l1vec; 2021 curpcb->pcb_dacr = pcb->pcb_dacr; 2022 curpcb->pcb_pagedir = pcb->pcb_pagedir; 2023 2024 } 2025 pmap_free_l1(pmap); 2026 2027 dprintf("pmap_release()\n"); 2028 } 2029 2030 2031 2032 /* 2033 * Helper function for pmap_grow_l2_bucket() 2034 */ 2035 static __inline int 2036 pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap) 2037 { 2038 struct l2_bucket *l2b; 2039 pt_entry_t *ptep; 2040 vm_paddr_t pa; 2041 struct vm_page *m; 2042 2043 m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 2044 if (m == NULL) 2045 return (1); 2046 pa = VM_PAGE_TO_PHYS(m); 2047 2048 if (pap) 2049 *pap = pa; 2050 2051 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2052 2053 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2054 *ptep = L2_S_PROTO | pa | cache_mode | L2_S_REF; 2055 pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 0); 2056 PTE_SYNC(ptep); 2057 cpu_tlb_flushD_SE(va); 2058 cpu_cpwait(); 2059 2060 return (0); 2061 } 2062 2063 /* 2064 * This is the same as pmap_alloc_l2_bucket(), except that it is only 2065 * used by pmap_growkernel(). 2066 */ 2067 static __inline struct l2_bucket * 2068 pmap_grow_l2_bucket(pmap_t pmap, vm_offset_t va) 2069 { 2070 struct l2_dtable *l2; 2071 struct l2_bucket *l2b; 2072 struct l1_ttable *l1; 2073 pd_entry_t *pl1pd; 2074 u_short l1idx; 2075 vm_offset_t nva; 2076 2077 l1idx = L1_IDX(va); 2078 2079 if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { 2080 /* 2081 * No mapping at this address, as there is 2082 * no entry in the L1 table. 2083 * Need to allocate a new l2_dtable. 2084 */ 2085 nva = pmap_kernel_l2dtable_kva; 2086 if ((nva & PAGE_MASK) == 0) { 2087 /* 2088 * Need to allocate a backing page 2089 */ 2090 if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) 2091 return (NULL); 2092 } 2093 2094 l2 = (struct l2_dtable *)nva; 2095 nva += sizeof(struct l2_dtable); 2096 2097 if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva & 2098 PAGE_MASK)) { 2099 /* 2100 * The new l2_dtable straddles a page boundary. 2101 * Map in another page to cover it. 2102 */ 2103 if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) 2104 return (NULL); 2105 } 2106 2107 pmap_kernel_l2dtable_kva = nva; 2108 2109 /* 2110 * Link it into the parent pmap 2111 */ 2112 pmap->pm_l2[L2_IDX(l1idx)] = l2; 2113 memset(l2, 0, sizeof(*l2)); 2114 } 2115 2116 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; 2117 2118 /* 2119 * Fetch pointer to the L2 page table associated with the address. 2120 */ 2121 if (l2b->l2b_kva == NULL) { 2122 pt_entry_t *ptep; 2123 2124 /* 2125 * No L2 page table has been allocated. Chances are, this 2126 * is because we just allocated the l2_dtable, above. 2127 */ 2128 nva = pmap_kernel_l2ptp_kva; 2129 ptep = (pt_entry_t *)nva; 2130 if ((nva & PAGE_MASK) == 0) { 2131 /* 2132 * Need to allocate a backing page 2133 */ 2134 if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt, 2135 &pmap_kernel_l2ptp_phys)) 2136 return (NULL); 2137 } 2138 memset(ptep, 0, L2_TABLE_SIZE_REAL); 2139 l2->l2_occupancy++; 2140 l2b->l2b_kva = ptep; 2141 l2b->l2b_l1idx = l1idx; 2142 l2b->l2b_phys = pmap_kernel_l2ptp_phys; 2143 2144 pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL; 2145 pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL; 2146 } 2147 2148 /* Distribute new L1 entry to all other L1s */ 2149 SLIST_FOREACH(l1, &l1_list, l1_link) { 2150 pl1pd = &l1->l1_kva[L1_IDX(va)]; 2151 *pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) | 2152 L1_C_PROTO; 2153 PTE_SYNC(pl1pd); 2154 } 2155 cpu_tlb_flushID_SE(va); 2156 cpu_cpwait(); 2157 2158 return (l2b); 2159 } 2160 2161 2162 /* 2163 * grow the number of kernel page table entries, if needed 2164 */ 2165 void 2166 pmap_growkernel(vm_offset_t addr) 2167 { 2168 pmap_t kpmap = pmap_kernel(); 2169 2170 if (addr <= pmap_curmaxkvaddr) 2171 return; /* we are OK */ 2172 2173 /* 2174 * whoops! we need to add kernel PTPs 2175 */ 2176 2177 /* Map 1MB at a time */ 2178 for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE) 2179 pmap_grow_l2_bucket(kpmap, pmap_curmaxkvaddr); 2180 2181 kernel_vm_end = pmap_curmaxkvaddr; 2182 } 2183 2184 /* 2185 * Returns TRUE if the given page is mapped individually or as part of 2186 * a 1MB section. Otherwise, returns FALSE. 2187 */ 2188 boolean_t 2189 pmap_page_is_mapped(vm_page_t m) 2190 { 2191 boolean_t rv; 2192 2193 if ((m->oflags & VPO_UNMANAGED) != 0) 2194 return (FALSE); 2195 rw_wlock(&pvh_global_lock); 2196 rv = !TAILQ_EMPTY(&m->md.pv_list) || 2197 ((m->flags & PG_FICTITIOUS) == 0 && 2198 !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); 2199 rw_wunlock(&pvh_global_lock); 2200 return (rv); 2201 } 2202 2203 /* 2204 * Remove all pages from specified address space 2205 * this aids process exit speeds. Also, this code 2206 * is special cased for current process only, but 2207 * can have the more generic (and slightly slower) 2208 * mode enabled. This is much faster than pmap_remove 2209 * in the case of running down an entire address space. 2210 */ 2211 void 2212 pmap_remove_pages(pmap_t pmap) 2213 { 2214 struct pv_entry *pv; 2215 struct l2_bucket *l2b = NULL; 2216 struct pv_chunk *pc, *npc; 2217 struct md_page *pvh; 2218 pd_entry_t *pl1pd, l1pd; 2219 pt_entry_t *ptep; 2220 vm_page_t m, mt; 2221 vm_offset_t va; 2222 uint32_t inuse, bitmask; 2223 int allfree, bit, field, idx; 2224 2225 rw_wlock(&pvh_global_lock); 2226 PMAP_LOCK(pmap); 2227 2228 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2229 allfree = 1; 2230 for (field = 0; field < _NPCM; field++) { 2231 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2232 while (inuse != 0) { 2233 bit = ffs(inuse) - 1; 2234 bitmask = 1ul << bit; 2235 idx = field * sizeof(inuse) * NBBY + bit; 2236 pv = &pc->pc_pventry[idx]; 2237 va = pv->pv_va; 2238 inuse &= ~bitmask; 2239 if (pv->pv_flags & PVF_WIRED) { 2240 /* Cannot remove wired pages now. */ 2241 allfree = 0; 2242 continue; 2243 } 2244 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 2245 l1pd = *pl1pd; 2246 l2b = pmap_get_l2_bucket(pmap, va); 2247 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 2248 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 2249 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 2250 if (TAILQ_EMPTY(&pvh->pv_list)) { 2251 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 2252 KASSERT((vm_offset_t)m >= KERNBASE, 2253 ("Trying to access non-existent page " 2254 "va %x l1pd %x", trunc_1mpage(va), l1pd)); 2255 for (mt = m; mt < &m[L2_PTE_NUM_TOTAL]; mt++) { 2256 if (TAILQ_EMPTY(&mt->md.pv_list)) 2257 vm_page_aflag_clear(mt, PGA_WRITEABLE); 2258 } 2259 } 2260 if (l2b != NULL) { 2261 KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL, 2262 ("pmap_remove_pages: l2_bucket occupancy error")); 2263 pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL); 2264 } 2265 pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL; 2266 *pl1pd = 0; 2267 PTE_SYNC(pl1pd); 2268 } else { 2269 KASSERT(l2b != NULL, 2270 ("No L2 bucket in pmap_remove_pages")); 2271 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2272 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 2273 KASSERT((vm_offset_t)m >= KERNBASE, 2274 ("Trying to access non-existent page " 2275 "va %x pte %x", va, *ptep)); 2276 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2277 if (TAILQ_EMPTY(&m->md.pv_list) && 2278 (m->flags & PG_FICTITIOUS) == 0) { 2279 pvh = pa_to_pvh(l2pte_pa(*ptep)); 2280 if (TAILQ_EMPTY(&pvh->pv_list)) 2281 vm_page_aflag_clear(m, PGA_WRITEABLE); 2282 } 2283 *ptep = 0; 2284 PTE_SYNC(ptep); 2285 pmap_free_l2_bucket(pmap, l2b, 1); 2286 pmap->pm_stats.resident_count--; 2287 } 2288 2289 /* Mark free */ 2290 PV_STAT(pv_entry_frees++); 2291 PV_STAT(pv_entry_spare++); 2292 pv_entry_count--; 2293 pc->pc_map[field] |= bitmask; 2294 } 2295 } 2296 if (allfree) { 2297 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2298 pmap_free_pv_chunk(pc); 2299 } 2300 2301 } 2302 2303 rw_wunlock(&pvh_global_lock); 2304 cpu_tlb_flushID(); 2305 cpu_cpwait(); 2306 PMAP_UNLOCK(pmap); 2307 } 2308 2309 2310 /*************************************************** 2311 * Low level mapping routines..... 2312 ***************************************************/ 2313 2314 #ifdef ARM_HAVE_SUPERSECTIONS 2315 /* Map a super section into the KVA. */ 2316 2317 void 2318 pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags) 2319 { 2320 pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) | 2321 (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL, 2322 VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | 2323 L1_S_DOM(PMAP_DOMAIN_KERNEL); 2324 struct l1_ttable *l1; 2325 vm_offset_t va0, va_end; 2326 2327 KASSERT(((va | pa) & L1_SUP_OFFSET) == 0, 2328 ("Not a valid super section mapping")); 2329 if (flags & SECTION_CACHE) 2330 pd |= pte_l1_s_cache_mode; 2331 else if (flags & SECTION_PT) 2332 pd |= pte_l1_s_cache_mode_pt; 2333 2334 va0 = va & L1_SUP_FRAME; 2335 va_end = va + L1_SUP_SIZE; 2336 SLIST_FOREACH(l1, &l1_list, l1_link) { 2337 va = va0; 2338 for (; va < va_end; va += L1_S_SIZE) { 2339 l1->l1_kva[L1_IDX(va)] = pd; 2340 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); 2341 } 2342 } 2343 } 2344 #endif 2345 2346 /* Map a section into the KVA. */ 2347 2348 void 2349 pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags) 2350 { 2351 pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL, 2352 VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | L1_S_REF | 2353 L1_S_DOM(PMAP_DOMAIN_KERNEL); 2354 struct l1_ttable *l1; 2355 2356 KASSERT(((va | pa) & L1_S_OFFSET) == 0, 2357 ("Not a valid section mapping")); 2358 if (flags & SECTION_CACHE) 2359 pd |= pte_l1_s_cache_mode; 2360 else if (flags & SECTION_PT) 2361 pd |= pte_l1_s_cache_mode_pt; 2362 2363 SLIST_FOREACH(l1, &l1_list, l1_link) { 2364 l1->l1_kva[L1_IDX(va)] = pd; 2365 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); 2366 } 2367 cpu_tlb_flushID_SE(va); 2368 cpu_cpwait(); 2369 } 2370 2371 /* 2372 * Make a temporary mapping for a physical address. This is only intended 2373 * to be used for panic dumps. 2374 */ 2375 void * 2376 pmap_kenter_temporary(vm_paddr_t pa, int i) 2377 { 2378 vm_offset_t va; 2379 2380 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2381 pmap_kenter(va, pa); 2382 return ((void *)crashdumpmap); 2383 } 2384 2385 /* 2386 * add a wired page to the kva 2387 * note that in order for the mapping to take effect -- you 2388 * should do a invltlb after doing the pmap_kenter... 2389 */ 2390 static PMAP_INLINE void 2391 pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags) 2392 { 2393 struct l2_bucket *l2b; 2394 pt_entry_t *ptep; 2395 pt_entry_t opte; 2396 2397 PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n", 2398 (uint32_t) va, (uint32_t) pa)); 2399 2400 2401 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2402 if (l2b == NULL) 2403 l2b = pmap_grow_l2_bucket(pmap_kernel(), va); 2404 KASSERT(l2b != NULL, ("No L2 Bucket")); 2405 2406 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2407 opte = *ptep; 2408 2409 if (flags & KENTER_CACHE) 2410 *ptep = L2_S_PROTO | l2s_mem_types[PTE_CACHE] | pa | L2_S_REF; 2411 else if (flags & KENTER_DEVICE) 2412 *ptep = L2_S_PROTO | l2s_mem_types[PTE_DEVICE] | pa | L2_S_REF; 2413 else 2414 *ptep = L2_S_PROTO | l2s_mem_types[PTE_NOCACHE] | pa | L2_S_REF; 2415 2416 if (flags & KENTER_CACHE) { 2417 pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 2418 flags & KENTER_USER); 2419 } else { 2420 pmap_set_prot(ptep, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 2421 0); 2422 } 2423 2424 PTE_SYNC(ptep); 2425 if (l2pte_valid(opte)) { 2426 if (L2_S_EXECUTABLE(opte) || L2_S_EXECUTABLE(*ptep)) 2427 cpu_tlb_flushID_SE(va); 2428 else 2429 cpu_tlb_flushD_SE(va); 2430 } else { 2431 if (opte == 0) 2432 l2b->l2b_occupancy++; 2433 } 2434 cpu_cpwait(); 2435 2436 PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n", 2437 (uint32_t) ptep, opte, *ptep)); 2438 } 2439 2440 void 2441 pmap_kenter(vm_offset_t va, vm_paddr_t pa) 2442 { 2443 pmap_kenter_internal(va, pa, KENTER_CACHE); 2444 } 2445 2446 void 2447 pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa) 2448 { 2449 2450 pmap_kenter_internal(va, pa, 0); 2451 } 2452 2453 void 2454 pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa) 2455 { 2456 vm_offset_t sva; 2457 2458 KASSERT((size & PAGE_MASK) == 0, 2459 ("%s: device mapping not page-sized", __func__)); 2460 2461 sva = va; 2462 while (size != 0) { 2463 pmap_kenter_internal(va, pa, KENTER_DEVICE); 2464 va += PAGE_SIZE; 2465 pa += PAGE_SIZE; 2466 size -= PAGE_SIZE; 2467 } 2468 } 2469 2470 void 2471 pmap_kremove_device(vm_offset_t va, vm_size_t size) 2472 { 2473 vm_offset_t sva; 2474 2475 KASSERT((size & PAGE_MASK) == 0, 2476 ("%s: device mapping not page-sized", __func__)); 2477 2478 sva = va; 2479 while (size != 0) { 2480 pmap_kremove(va); 2481 va += PAGE_SIZE; 2482 size -= PAGE_SIZE; 2483 } 2484 } 2485 2486 void 2487 pmap_kenter_user(vm_offset_t va, vm_paddr_t pa) 2488 { 2489 2490 pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER); 2491 /* 2492 * Call pmap_fault_fixup now, to make sure we'll have no exception 2493 * at the first use of the new address, or bad things will happen, 2494 * as we use one of these addresses in the exception handlers. 2495 */ 2496 pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1); 2497 } 2498 2499 vm_paddr_t 2500 pmap_kextract(vm_offset_t va) 2501 { 2502 2503 if (kernel_vm_end == 0) 2504 return (0); 2505 return (pmap_extract_locked(kernel_pmap, va)); 2506 } 2507 2508 /* 2509 * remove a page from the kernel pagetables 2510 */ 2511 void 2512 pmap_kremove(vm_offset_t va) 2513 { 2514 struct l2_bucket *l2b; 2515 pt_entry_t *ptep, opte; 2516 2517 l2b = pmap_get_l2_bucket(pmap_kernel(), va); 2518 if (!l2b) 2519 return; 2520 KASSERT(l2b != NULL, ("No L2 Bucket")); 2521 ptep = &l2b->l2b_kva[l2pte_index(va)]; 2522 opte = *ptep; 2523 if (l2pte_valid(opte)) { 2524 va = va & ~PAGE_MASK; 2525 *ptep = 0; 2526 PTE_SYNC(ptep); 2527 if (L2_S_EXECUTABLE(opte)) 2528 cpu_tlb_flushID_SE(va); 2529 else 2530 cpu_tlb_flushD_SE(va); 2531 cpu_cpwait(); 2532 } 2533 } 2534 2535 2536 /* 2537 * Used to map a range of physical addresses into kernel 2538 * virtual address space. 2539 * 2540 * The value passed in '*virt' is a suggested virtual address for 2541 * the mapping. Architectures which can support a direct-mapped 2542 * physical to virtual region can return the appropriate address 2543 * within that region, leaving '*virt' unchanged. Other 2544 * architectures should map the pages starting at '*virt' and 2545 * update '*virt' with the first usable address after the mapped 2546 * region. 2547 */ 2548 vm_offset_t 2549 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) 2550 { 2551 vm_offset_t sva = *virt; 2552 vm_offset_t va = sva; 2553 2554 PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, " 2555 "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end, 2556 prot)); 2557 2558 while (start < end) { 2559 pmap_kenter(va, start); 2560 va += PAGE_SIZE; 2561 start += PAGE_SIZE; 2562 } 2563 *virt = va; 2564 return (sva); 2565 } 2566 2567 /* 2568 * Add a list of wired pages to the kva 2569 * this routine is only used for temporary 2570 * kernel mappings that do not need to have 2571 * page modification or references recorded. 2572 * Note that old mappings are simply written 2573 * over. The page *must* be wired. 2574 */ 2575 void 2576 pmap_qenter(vm_offset_t va, vm_page_t *m, int count) 2577 { 2578 int i; 2579 2580 for (i = 0; i < count; i++) { 2581 pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]), 2582 KENTER_CACHE); 2583 va += PAGE_SIZE; 2584 } 2585 } 2586 2587 2588 /* 2589 * this routine jerks page mappings from the 2590 * kernel -- it is meant only for temporary mappings. 2591 */ 2592 void 2593 pmap_qremove(vm_offset_t va, int count) 2594 { 2595 int i; 2596 2597 for (i = 0; i < count; i++) { 2598 if (vtophys(va)) 2599 pmap_kremove(va); 2600 2601 va += PAGE_SIZE; 2602 } 2603 } 2604 2605 2606 /* 2607 * pmap_object_init_pt preloads the ptes for a given object 2608 * into the specified pmap. This eliminates the blast of soft 2609 * faults on process startup and immediately after an mmap. 2610 */ 2611 void 2612 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2613 vm_pindex_t pindex, vm_size_t size) 2614 { 2615 2616 VM_OBJECT_ASSERT_WLOCKED(object); 2617 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2618 ("pmap_object_init_pt: non-device object")); 2619 } 2620 2621 2622 /* 2623 * pmap_is_prefaultable: 2624 * 2625 * Return whether or not the specified virtual address is elgible 2626 * for prefault. 2627 */ 2628 boolean_t 2629 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2630 { 2631 pd_entry_t *pdep; 2632 pt_entry_t *ptep; 2633 2634 if (!pmap_get_pde_pte(pmap, addr, &pdep, &ptep)) 2635 return (FALSE); 2636 KASSERT((pdep != NULL && (l1pte_section_p(*pdep) || ptep != NULL)), 2637 ("Valid mapping but no pte ?")); 2638 if (*pdep != 0 && !l1pte_section_p(*pdep)) 2639 if (*ptep == 0) 2640 return (TRUE); 2641 return (FALSE); 2642 } 2643 2644 /* 2645 * Fetch pointers to the PDE/PTE for the given pmap/VA pair. 2646 * Returns TRUE if the mapping exists, else FALSE. 2647 * 2648 * NOTE: This function is only used by a couple of arm-specific modules. 2649 * It is not safe to take any pmap locks here, since we could be right 2650 * in the middle of debugging the pmap anyway... 2651 * 2652 * It is possible for this routine to return FALSE even though a valid 2653 * mapping does exist. This is because we don't lock, so the metadata 2654 * state may be inconsistent. 2655 * 2656 * NOTE: We can return a NULL *ptp in the case where the L1 pde is 2657 * a "section" mapping. 2658 */ 2659 boolean_t 2660 pmap_get_pde_pte(pmap_t pmap, vm_offset_t va, pd_entry_t **pdp, 2661 pt_entry_t **ptp) 2662 { 2663 struct l2_dtable *l2; 2664 pd_entry_t *pl1pd, l1pd; 2665 pt_entry_t *ptep; 2666 u_short l1idx; 2667 2668 if (pmap->pm_l1 == NULL) 2669 return (FALSE); 2670 2671 l1idx = L1_IDX(va); 2672 *pdp = pl1pd = &pmap->pm_l1->l1_kva[l1idx]; 2673 l1pd = *pl1pd; 2674 2675 if (l1pte_section_p(l1pd)) { 2676 *ptp = NULL; 2677 return (TRUE); 2678 } 2679 2680 if (pmap->pm_l2 == NULL) 2681 return (FALSE); 2682 2683 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 2684 2685 if (l2 == NULL || 2686 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { 2687 return (FALSE); 2688 } 2689 2690 *ptp = &ptep[l2pte_index(va)]; 2691 return (TRUE); 2692 } 2693 2694 /* 2695 * Routine: pmap_remove_all 2696 * Function: 2697 * Removes this physical page from 2698 * all physical maps in which it resides. 2699 * Reflects back modify bits to the pager. 2700 * 2701 * Notes: 2702 * Original versions of this routine were very 2703 * inefficient because they iteratively called 2704 * pmap_remove (slow...) 2705 */ 2706 void 2707 pmap_remove_all(vm_page_t m) 2708 { 2709 struct md_page *pvh; 2710 pv_entry_t pv; 2711 pmap_t pmap; 2712 pt_entry_t *ptep; 2713 struct l2_bucket *l2b; 2714 boolean_t flush = FALSE; 2715 pmap_t curpmap; 2716 u_int is_exec = 0; 2717 2718 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2719 ("pmap_remove_all: page %p is not managed", m)); 2720 rw_wlock(&pvh_global_lock); 2721 if ((m->flags & PG_FICTITIOUS) != 0) 2722 goto small_mappings; 2723 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2724 while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2725 pmap = PV_PMAP(pv); 2726 PMAP_LOCK(pmap); 2727 pd_entry_t *pl1pd; 2728 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 2729 KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO, 2730 ("pmap_remove_all: valid section mapping expected")); 2731 (void)pmap_demote_section(pmap, pv->pv_va); 2732 PMAP_UNLOCK(pmap); 2733 } 2734 small_mappings: 2735 curpmap = vmspace_pmap(curproc->p_vmspace); 2736 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2737 pmap = PV_PMAP(pv); 2738 if (flush == FALSE && (pmap == curpmap || 2739 pmap == pmap_kernel())) 2740 flush = TRUE; 2741 2742 PMAP_LOCK(pmap); 2743 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 2744 KASSERT(l2b != NULL, ("No l2 bucket")); 2745 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 2746 is_exec |= PTE_BEEN_EXECD(*ptep); 2747 *ptep = 0; 2748 if (pmap_is_current(pmap)) 2749 PTE_SYNC(ptep); 2750 pmap_free_l2_bucket(pmap, l2b, 1); 2751 pmap->pm_stats.resident_count--; 2752 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2753 if (pv->pv_flags & PVF_WIRED) 2754 pmap->pm_stats.wired_count--; 2755 pmap_free_pv_entry(pmap, pv); 2756 PMAP_UNLOCK(pmap); 2757 } 2758 2759 if (flush) { 2760 if (is_exec) 2761 cpu_tlb_flushID(); 2762 else 2763 cpu_tlb_flushD(); 2764 cpu_cpwait(); 2765 } 2766 vm_page_aflag_clear(m, PGA_WRITEABLE); 2767 rw_wunlock(&pvh_global_lock); 2768 } 2769 2770 int 2771 pmap_change_attr(vm_offset_t sva, vm_size_t len, int mode) 2772 { 2773 vm_offset_t base, offset, tmpva; 2774 vm_size_t size; 2775 struct l2_bucket *l2b; 2776 pt_entry_t *ptep, pte; 2777 vm_offset_t next_bucket; 2778 2779 PMAP_LOCK(kernel_pmap); 2780 2781 base = trunc_page(sva); 2782 offset = sva & PAGE_MASK; 2783 size = roundup(offset + len, PAGE_SIZE); 2784 2785 for (tmpva = base; tmpva < base + size; ) { 2786 next_bucket = L2_NEXT_BUCKET(tmpva); 2787 if (next_bucket > base + size) 2788 next_bucket = base + size; 2789 2790 l2b = pmap_get_l2_bucket(kernel_pmap, tmpva); 2791 if (l2b == NULL) { 2792 tmpva = next_bucket; 2793 continue; 2794 } 2795 2796 ptep = &l2b->l2b_kva[l2pte_index(tmpva)]; 2797 2798 if (*ptep == 0) { 2799 PMAP_UNLOCK(kernel_pmap); 2800 return(EINVAL); 2801 } 2802 2803 pte = *ptep &~ L2_S_CACHE_MASK; 2804 cpu_idcache_wbinv_range(tmpva, PAGE_SIZE); 2805 pmap_l2cache_wbinv_range(tmpva, pte & L2_S_FRAME, PAGE_SIZE); 2806 *ptep = pte; 2807 cpu_tlb_flushID_SE(tmpva); 2808 cpu_cpwait(); 2809 2810 dprintf("%s: for va:%x ptep:%x pte:%x\n", 2811 __func__, tmpva, (uint32_t)ptep, pte); 2812 tmpva += PAGE_SIZE; 2813 } 2814 2815 PMAP_UNLOCK(kernel_pmap); 2816 2817 return (0); 2818 } 2819 2820 /* 2821 * Set the physical protection on the 2822 * specified range of this map as requested. 2823 */ 2824 void 2825 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2826 { 2827 struct l2_bucket *l2b; 2828 struct md_page *pvh; 2829 struct pv_entry *pve; 2830 pd_entry_t *pl1pd, l1pd; 2831 pt_entry_t *ptep, pte; 2832 vm_offset_t next_bucket; 2833 u_int is_exec, is_refd; 2834 int flush; 2835 2836 if ((prot & VM_PROT_READ) == 0) { 2837 pmap_remove(pmap, sva, eva); 2838 return; 2839 } 2840 2841 if (prot & VM_PROT_WRITE) { 2842 /* 2843 * If this is a read->write transition, just ignore it and let 2844 * vm_fault() take care of it later. 2845 */ 2846 return; 2847 } 2848 2849 rw_wlock(&pvh_global_lock); 2850 PMAP_LOCK(pmap); 2851 2852 /* 2853 * OK, at this point, we know we're doing write-protect operation. 2854 * If the pmap is active, write-back the range. 2855 */ 2856 2857 flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1; 2858 is_exec = is_refd = 0; 2859 2860 while (sva < eva) { 2861 next_bucket = L2_NEXT_BUCKET(sva); 2862 /* 2863 * Check for large page. 2864 */ 2865 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 2866 l1pd = *pl1pd; 2867 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 2868 KASSERT(pmap != pmap_kernel(), 2869 ("pmap_protect: trying to modify " 2870 "kernel section protections")); 2871 /* 2872 * Are we protecting the entire large page? If not, 2873 * demote the mapping and fall through. 2874 */ 2875 if (sva + L1_S_SIZE == next_bucket && 2876 eva >= next_bucket) { 2877 l1pd &= ~(L1_S_PROT_MASK | L1_S_XN); 2878 if (!(prot & VM_PROT_EXECUTE)) 2879 l1pd |= L1_S_XN; 2880 /* 2881 * At this point we are always setting 2882 * write-protect bit. 2883 */ 2884 l1pd |= L1_S_APX; 2885 /* All managed superpages are user pages. */ 2886 l1pd |= L1_S_PROT_U; 2887 *pl1pd = l1pd; 2888 PTE_SYNC(pl1pd); 2889 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 2890 pve = pmap_find_pv(pvh, pmap, 2891 trunc_1mpage(sva)); 2892 pve->pv_flags &= ~PVF_WRITE; 2893 sva = next_bucket; 2894 continue; 2895 } else if (!pmap_demote_section(pmap, sva)) { 2896 /* The large page mapping was destroyed. */ 2897 sva = next_bucket; 2898 continue; 2899 } 2900 } 2901 if (next_bucket > eva) 2902 next_bucket = eva; 2903 l2b = pmap_get_l2_bucket(pmap, sva); 2904 if (l2b == NULL) { 2905 sva = next_bucket; 2906 continue; 2907 } 2908 2909 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 2910 2911 while (sva < next_bucket) { 2912 if ((pte = *ptep) != 0 && L2_S_WRITABLE(pte)) { 2913 struct vm_page *m; 2914 2915 m = PHYS_TO_VM_PAGE(l2pte_pa(pte)); 2916 pmap_set_prot(ptep, prot, 2917 !(pmap == pmap_kernel())); 2918 PTE_SYNC(ptep); 2919 2920 pmap_modify_pv(m, pmap, sva, PVF_WRITE, 0); 2921 2922 if (flush >= 0) { 2923 flush++; 2924 is_exec |= PTE_BEEN_EXECD(pte); 2925 is_refd |= PTE_BEEN_REFD(pte); 2926 } else { 2927 if (PTE_BEEN_EXECD(pte)) 2928 cpu_tlb_flushID_SE(sva); 2929 else if (PTE_BEEN_REFD(pte)) 2930 cpu_tlb_flushD_SE(sva); 2931 } 2932 } 2933 2934 sva += PAGE_SIZE; 2935 ptep++; 2936 } 2937 } 2938 2939 2940 if (flush) { 2941 if (is_exec) 2942 cpu_tlb_flushID(); 2943 else 2944 if (is_refd) 2945 cpu_tlb_flushD(); 2946 cpu_cpwait(); 2947 } 2948 rw_wunlock(&pvh_global_lock); 2949 2950 PMAP_UNLOCK(pmap); 2951 } 2952 2953 2954 /* 2955 * Insert the given physical page (p) at 2956 * the specified virtual address (v) in the 2957 * target physical map with the protection requested. 2958 * 2959 * If specified, the page will be wired down, meaning 2960 * that the related pte can not be reclaimed. 2961 * 2962 * NB: This is the only routine which MAY NOT lazy-evaluate 2963 * or lose information. That is, this routine must actually 2964 * insert this page into the given map NOW. 2965 */ 2966 2967 int 2968 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2969 u_int flags, int8_t psind __unused) 2970 { 2971 struct l2_bucket *l2b; 2972 int rv; 2973 2974 rw_wlock(&pvh_global_lock); 2975 PMAP_LOCK(pmap); 2976 rv = pmap_enter_locked(pmap, va, m, prot, flags); 2977 if (rv == KERN_SUCCESS) { 2978 /* 2979 * If both the l2b_occupancy and the reservation are fully 2980 * populated, then attempt promotion. 2981 */ 2982 l2b = pmap_get_l2_bucket(pmap, va); 2983 if (l2b != NULL && l2b->l2b_occupancy == L2_PTE_NUM_TOTAL && 2984 sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && 2985 vm_reserv_level_iffullpop(m) == 0) 2986 pmap_promote_section(pmap, va); 2987 } 2988 PMAP_UNLOCK(pmap); 2989 rw_wunlock(&pvh_global_lock); 2990 return (rv); 2991 } 2992 2993 /* 2994 * The pvh global and pmap locks must be held. 2995 */ 2996 static int 2997 pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2998 u_int flags) 2999 { 3000 struct l2_bucket *l2b = NULL; 3001 struct vm_page *om; 3002 struct pv_entry *pve = NULL; 3003 pd_entry_t *pl1pd, l1pd; 3004 pt_entry_t *ptep, npte, opte; 3005 u_int nflags; 3006 u_int is_exec, is_refd; 3007 vm_paddr_t pa; 3008 u_char user; 3009 3010 PMAP_ASSERT_LOCKED(pmap); 3011 rw_assert(&pvh_global_lock, RA_WLOCKED); 3012 if (va == vector_page) { 3013 pa = systempage.pv_pa; 3014 m = NULL; 3015 } else { 3016 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 3017 VM_OBJECT_ASSERT_LOCKED(m->object); 3018 pa = VM_PAGE_TO_PHYS(m); 3019 } 3020 3021 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3022 if ((va < VM_MAXUSER_ADDRESS) && 3023 (*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) { 3024 (void)pmap_demote_section(pmap, va); 3025 } 3026 3027 user = 0; 3028 /* 3029 * Make sure userland mappings get the right permissions 3030 */ 3031 if (pmap != pmap_kernel() && va != vector_page) 3032 user = 1; 3033 3034 nflags = 0; 3035 3036 if (prot & VM_PROT_WRITE) 3037 nflags |= PVF_WRITE; 3038 if ((flags & PMAP_ENTER_WIRED) != 0) 3039 nflags |= PVF_WIRED; 3040 3041 PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, " 3042 "prot = %x, flags = %x\n", (uint32_t) pmap, va, (uint32_t) m, 3043 prot, flags)); 3044 3045 if (pmap == pmap_kernel()) { 3046 l2b = pmap_get_l2_bucket(pmap, va); 3047 if (l2b == NULL) 3048 l2b = pmap_grow_l2_bucket(pmap, va); 3049 } else { 3050 do_l2b_alloc: 3051 l2b = pmap_alloc_l2_bucket(pmap, va); 3052 if (l2b == NULL) { 3053 if ((flags & PMAP_ENTER_NOSLEEP) == 0) { 3054 PMAP_UNLOCK(pmap); 3055 rw_wunlock(&pvh_global_lock); 3056 VM_WAIT; 3057 rw_wlock(&pvh_global_lock); 3058 PMAP_LOCK(pmap); 3059 goto do_l2b_alloc; 3060 } 3061 return (KERN_RESOURCE_SHORTAGE); 3062 } 3063 } 3064 3065 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3066 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 3067 panic("pmap_enter: attempt to enter on 1MB page, va: %#x", va); 3068 3069 ptep = &l2b->l2b_kva[l2pte_index(va)]; 3070 3071 opte = *ptep; 3072 npte = pa; 3073 is_exec = is_refd = 0; 3074 3075 if (opte) { 3076 if (l2pte_pa(opte) == pa) { 3077 /* 3078 * We're changing the attrs of an existing mapping. 3079 */ 3080 if (m != NULL) 3081 pmap_modify_pv(m, pmap, va, 3082 PVF_WRITE | PVF_WIRED, nflags); 3083 is_exec |= PTE_BEEN_EXECD(opte); 3084 is_refd |= PTE_BEEN_REFD(opte); 3085 goto validate; 3086 } 3087 if ((om = PHYS_TO_VM_PAGE(l2pte_pa(opte)))) { 3088 /* 3089 * Replacing an existing mapping with a new one. 3090 * It is part of our managed memory so we 3091 * must remove it from the PV list 3092 */ 3093 if ((pve = pmap_remove_pv(om, pmap, va))) { 3094 is_exec |= PTE_BEEN_EXECD(opte); 3095 is_refd |= PTE_BEEN_REFD(opte); 3096 3097 if (m && ((m->oflags & VPO_UNMANAGED))) 3098 pmap_free_pv_entry(pmap, pve); 3099 } 3100 } 3101 3102 } else { 3103 /* 3104 * Keep the stats up to date 3105 */ 3106 l2b->l2b_occupancy++; 3107 pmap->pm_stats.resident_count++; 3108 } 3109 3110 /* 3111 * Enter on the PV list if part of our managed memory. 3112 */ 3113 if ((m && !(m->oflags & VPO_UNMANAGED))) { 3114 if ((!pve) && (pve = pmap_get_pv_entry(pmap, FALSE)) == NULL) 3115 panic("pmap_enter: no pv entries"); 3116 3117 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 3118 ("pmap_enter: managed mapping within the clean submap")); 3119 KASSERT(pve != NULL, ("No pv")); 3120 pmap_enter_pv(m, pve, pmap, va, nflags); 3121 } 3122 3123 validate: 3124 /* Make the new PTE valid */ 3125 npte |= L2_S_PROTO; 3126 #ifdef SMP 3127 npte |= L2_SHARED; 3128 #endif 3129 /* Set defaults first - kernel read access */ 3130 npte |= L2_APX; 3131 npte |= L2_S_PROT_R; 3132 /* Set "referenced" flag */ 3133 npte |= L2_S_REF; 3134 3135 /* Now tune APs as desired */ 3136 if (user) 3137 npte |= L2_S_PROT_U; 3138 /* 3139 * If this is not a vector_page 3140 * then continue setting mapping parameters 3141 */ 3142 if (m != NULL) { 3143 if ((m->oflags & VPO_UNMANAGED) == 0) { 3144 if (prot & (VM_PROT_ALL)) { 3145 vm_page_aflag_set(m, PGA_REFERENCED); 3146 } else { 3147 /* 3148 * Need to do page referenced emulation. 3149 */ 3150 npte &= ~L2_S_REF; 3151 } 3152 } 3153 3154 if (prot & VM_PROT_WRITE) { 3155 if ((m->oflags & VPO_UNMANAGED) == 0) { 3156 vm_page_aflag_set(m, PGA_WRITEABLE); 3157 /* 3158 * XXX: Skip modified bit emulation for now. 3159 * The emulation reveals problems 3160 * that result in random failures 3161 * during memory allocation on some 3162 * platforms. 3163 * Therefore, the page is marked RW 3164 * immediately. 3165 */ 3166 npte &= ~(L2_APX); 3167 vm_page_dirty(m); 3168 } else 3169 npte &= ~(L2_APX); 3170 } 3171 if (!(prot & VM_PROT_EXECUTE)) 3172 npte |= L2_XN; 3173 3174 if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) 3175 npte |= pte_l2_s_cache_mode; 3176 } 3177 3178 CTR5(KTR_PMAP,"enter: pmap:%p va:%x prot:%x pte:%x->%x", 3179 pmap, va, prot, opte, npte); 3180 /* 3181 * If this is just a wiring change, the two PTEs will be 3182 * identical, so there's no need to update the page table. 3183 */ 3184 if (npte != opte) { 3185 boolean_t is_cached = pmap_is_current(pmap); 3186 3187 *ptep = npte; 3188 PTE_SYNC(ptep); 3189 if (is_cached) { 3190 /* 3191 * We only need to frob the cache/tlb if this pmap 3192 * is current 3193 */ 3194 if (L1_IDX(va) != L1_IDX(vector_page) && 3195 l2pte_valid(npte)) { 3196 /* 3197 * This mapping is likely to be accessed as 3198 * soon as we return to userland. Fix up the 3199 * L1 entry to avoid taking another 3200 * page/domain fault. 3201 */ 3202 l1pd = l2b->l2b_phys | 3203 L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 3204 if (*pl1pd != l1pd) { 3205 *pl1pd = l1pd; 3206 PTE_SYNC(pl1pd); 3207 } 3208 } 3209 } 3210 3211 if (is_exec) 3212 cpu_tlb_flushID_SE(va); 3213 else if (is_refd) 3214 cpu_tlb_flushD_SE(va); 3215 cpu_cpwait(); 3216 } 3217 3218 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 3219 cpu_icache_sync_range(va, PAGE_SIZE); 3220 return (KERN_SUCCESS); 3221 } 3222 3223 /* 3224 * Maps a sequence of resident pages belonging to the same object. 3225 * The sequence begins with the given page m_start. This page is 3226 * mapped at the given virtual address start. Each subsequent page is 3227 * mapped at a virtual address that is offset from start by the same 3228 * amount as the page is offset from m_start within the object. The 3229 * last page in the sequence is the page with the largest offset from 3230 * m_start that can be mapped at a virtual address less than the given 3231 * virtual address end. Not every virtual page between start and end 3232 * is mapped; only those for which a resident page exists with the 3233 * corresponding offset from m_start are mapped. 3234 */ 3235 void 3236 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3237 vm_page_t m_start, vm_prot_t prot) 3238 { 3239 vm_offset_t va; 3240 vm_page_t m; 3241 vm_pindex_t diff, psize; 3242 3243 VM_OBJECT_ASSERT_LOCKED(m_start->object); 3244 3245 psize = atop(end - start); 3246 m = m_start; 3247 prot &= VM_PROT_READ | VM_PROT_EXECUTE; 3248 rw_wlock(&pvh_global_lock); 3249 PMAP_LOCK(pmap); 3250 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3251 va = start + ptoa(diff); 3252 if ((va & L1_S_OFFSET) == 0 && L2_NEXT_BUCKET(va) <= end && 3253 m->psind == 1 && sp_enabled && 3254 pmap_enter_section(pmap, va, m, prot)) 3255 m = &m[L1_S_SIZE / PAGE_SIZE - 1]; 3256 else 3257 pmap_enter_locked(pmap, va, m, prot, 3258 PMAP_ENTER_NOSLEEP); 3259 m = TAILQ_NEXT(m, listq); 3260 } 3261 PMAP_UNLOCK(pmap); 3262 rw_wunlock(&pvh_global_lock); 3263 } 3264 3265 /* 3266 * this code makes some *MAJOR* assumptions: 3267 * 1. Current pmap & pmap exists. 3268 * 2. Not wired. 3269 * 3. Read access. 3270 * 4. No page table pages. 3271 * but is *MUCH* faster than pmap_enter... 3272 */ 3273 3274 void 3275 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3276 { 3277 3278 prot &= VM_PROT_READ | VM_PROT_EXECUTE; 3279 rw_wlock(&pvh_global_lock); 3280 PMAP_LOCK(pmap); 3281 pmap_enter_locked(pmap, va, m, prot, PMAP_ENTER_NOSLEEP); 3282 PMAP_UNLOCK(pmap); 3283 rw_wunlock(&pvh_global_lock); 3284 } 3285 3286 /* 3287 * Clear the wired attribute from the mappings for the specified range of 3288 * addresses in the given pmap. Every valid mapping within that range 3289 * must have the wired attribute set. In contrast, invalid mappings 3290 * cannot have the wired attribute set, so they are ignored. 3291 * 3292 * XXX Wired mappings of unmanaged pages cannot be counted by this pmap 3293 * implementation. 3294 */ 3295 void 3296 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3297 { 3298 struct l2_bucket *l2b; 3299 struct md_page *pvh; 3300 pd_entry_t l1pd; 3301 pt_entry_t *ptep, pte; 3302 pv_entry_t pv; 3303 vm_offset_t next_bucket; 3304 vm_paddr_t pa; 3305 vm_page_t m; 3306 3307 rw_wlock(&pvh_global_lock); 3308 PMAP_LOCK(pmap); 3309 while (sva < eva) { 3310 next_bucket = L2_NEXT_BUCKET(sva); 3311 l1pd = pmap->pm_l1->l1_kva[L1_IDX(sva)]; 3312 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 3313 pa = l1pd & L1_S_FRAME; 3314 m = PHYS_TO_VM_PAGE(pa); 3315 KASSERT(m != NULL && (m->oflags & VPO_UNMANAGED) == 0, 3316 ("pmap_unwire: unmanaged 1mpage %p", m)); 3317 pvh = pa_to_pvh(pa); 3318 pv = pmap_find_pv(pvh, pmap, trunc_1mpage(sva)); 3319 if ((pv->pv_flags & PVF_WIRED) == 0) 3320 panic("pmap_unwire: pv %p isn't wired", pv); 3321 3322 /* 3323 * Are we unwiring the entire large page? If not, 3324 * demote the mapping and fall through. 3325 */ 3326 if (sva + L1_S_SIZE == next_bucket && 3327 eva >= next_bucket) { 3328 pv->pv_flags &= ~PVF_WIRED; 3329 pmap->pm_stats.wired_count -= L2_PTE_NUM_TOTAL; 3330 sva = next_bucket; 3331 continue; 3332 } else if (!pmap_demote_section(pmap, sva)) 3333 panic("pmap_unwire: demotion failed"); 3334 } 3335 if (next_bucket > eva) 3336 next_bucket = eva; 3337 l2b = pmap_get_l2_bucket(pmap, sva); 3338 if (l2b == NULL) { 3339 sva = next_bucket; 3340 continue; 3341 } 3342 for (ptep = &l2b->l2b_kva[l2pte_index(sva)]; sva < next_bucket; 3343 sva += PAGE_SIZE, ptep++) { 3344 if ((pte = *ptep) == 0 || 3345 (m = PHYS_TO_VM_PAGE(l2pte_pa(pte))) == NULL || 3346 (m->oflags & VPO_UNMANAGED) != 0) 3347 continue; 3348 pv = pmap_find_pv(&m->md, pmap, sva); 3349 if ((pv->pv_flags & PVF_WIRED) == 0) 3350 panic("pmap_unwire: pv %p isn't wired", pv); 3351 pv->pv_flags &= ~PVF_WIRED; 3352 pmap->pm_stats.wired_count--; 3353 } 3354 } 3355 rw_wunlock(&pvh_global_lock); 3356 PMAP_UNLOCK(pmap); 3357 } 3358 3359 3360 /* 3361 * Copy the range specified by src_addr/len 3362 * from the source map to the range dst_addr/len 3363 * in the destination map. 3364 * 3365 * This routine is only advisory and need not do anything. 3366 */ 3367 void 3368 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, 3369 vm_size_t len, vm_offset_t src_addr) 3370 { 3371 } 3372 3373 3374 /* 3375 * Routine: pmap_extract 3376 * Function: 3377 * Extract the physical page address associated 3378 * with the given map/virtual_address pair. 3379 */ 3380 vm_paddr_t 3381 pmap_extract(pmap_t pmap, vm_offset_t va) 3382 { 3383 vm_paddr_t pa; 3384 3385 PMAP_LOCK(pmap); 3386 pa = pmap_extract_locked(pmap, va); 3387 PMAP_UNLOCK(pmap); 3388 return (pa); 3389 } 3390 3391 static vm_paddr_t 3392 pmap_extract_locked(pmap_t pmap, vm_offset_t va) 3393 { 3394 struct l2_dtable *l2; 3395 pd_entry_t l1pd; 3396 pt_entry_t *ptep, pte; 3397 vm_paddr_t pa; 3398 u_int l1idx; 3399 3400 if (kernel_vm_end != 0 && pmap != kernel_pmap) 3401 PMAP_ASSERT_LOCKED(pmap); 3402 l1idx = L1_IDX(va); 3403 l1pd = pmap->pm_l1->l1_kva[l1idx]; 3404 if (l1pte_section_p(l1pd)) { 3405 /* XXX: what to do about the bits > 32 ? */ 3406 if (l1pd & L1_S_SUPERSEC) 3407 pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); 3408 else 3409 pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); 3410 } else { 3411 /* 3412 * Note that we can't rely on the validity of the L1 3413 * descriptor as an indication that a mapping exists. 3414 * We have to look it up in the L2 dtable. 3415 */ 3416 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 3417 if (l2 == NULL || 3418 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) 3419 return (0); 3420 pte = ptep[l2pte_index(va)]; 3421 if (pte == 0) 3422 return (0); 3423 switch (pte & L2_TYPE_MASK) { 3424 case L2_TYPE_L: 3425 pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); 3426 break; 3427 default: 3428 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); 3429 break; 3430 } 3431 } 3432 return (pa); 3433 } 3434 3435 /* 3436 * Atomically extract and hold the physical page with the given 3437 * pmap and virtual address pair if that mapping permits the given 3438 * protection. 3439 * 3440 */ 3441 vm_page_t 3442 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 3443 { 3444 struct l2_dtable *l2; 3445 pd_entry_t l1pd; 3446 pt_entry_t *ptep, pte; 3447 vm_paddr_t pa, paddr; 3448 vm_page_t m = NULL; 3449 u_int l1idx; 3450 l1idx = L1_IDX(va); 3451 paddr = 0; 3452 3453 PMAP_LOCK(pmap); 3454 retry: 3455 l1pd = pmap->pm_l1->l1_kva[l1idx]; 3456 if (l1pte_section_p(l1pd)) { 3457 /* XXX: what to do about the bits > 32 ? */ 3458 if (l1pd & L1_S_SUPERSEC) 3459 pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); 3460 else 3461 pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); 3462 if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) 3463 goto retry; 3464 if (L1_S_WRITABLE(l1pd) || (prot & VM_PROT_WRITE) == 0) { 3465 m = PHYS_TO_VM_PAGE(pa); 3466 vm_page_hold(m); 3467 } 3468 } else { 3469 /* 3470 * Note that we can't rely on the validity of the L1 3471 * descriptor as an indication that a mapping exists. 3472 * We have to look it up in the L2 dtable. 3473 */ 3474 l2 = pmap->pm_l2[L2_IDX(l1idx)]; 3475 3476 if (l2 == NULL || 3477 (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { 3478 PMAP_UNLOCK(pmap); 3479 return (NULL); 3480 } 3481 3482 ptep = &ptep[l2pte_index(va)]; 3483 pte = *ptep; 3484 3485 if (pte == 0) { 3486 PMAP_UNLOCK(pmap); 3487 return (NULL); 3488 } else if ((prot & VM_PROT_WRITE) && (pte & L2_APX)) { 3489 PMAP_UNLOCK(pmap); 3490 return (NULL); 3491 } else { 3492 switch (pte & L2_TYPE_MASK) { 3493 case L2_TYPE_L: 3494 panic("extract and hold section mapping"); 3495 break; 3496 default: 3497 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); 3498 break; 3499 } 3500 if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) 3501 goto retry; 3502 m = PHYS_TO_VM_PAGE(pa); 3503 vm_page_hold(m); 3504 } 3505 3506 } 3507 3508 PMAP_UNLOCK(pmap); 3509 PA_UNLOCK_COND(paddr); 3510 return (m); 3511 } 3512 3513 /* 3514 * Initialize a preallocated and zeroed pmap structure, 3515 * such as one in a vmspace structure. 3516 */ 3517 3518 int 3519 pmap_pinit(pmap_t pmap) 3520 { 3521 PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap)); 3522 3523 pmap_alloc_l1(pmap); 3524 bzero(pmap->pm_l2, sizeof(pmap->pm_l2)); 3525 3526 CPU_ZERO(&pmap->pm_active); 3527 3528 TAILQ_INIT(&pmap->pm_pvchunk); 3529 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 3530 pmap->pm_stats.resident_count = 1; 3531 if (vector_page < KERNBASE) { 3532 pmap_enter(pmap, vector_page, 3533 PHYS_TO_VM_PAGE(systempage.pv_pa), VM_PROT_READ, 3534 PMAP_ENTER_WIRED, 0); 3535 } 3536 return (1); 3537 } 3538 3539 3540 /*************************************************** 3541 * Superpage management routines. 3542 ***************************************************/ 3543 3544 static PMAP_INLINE struct pv_entry * 3545 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3546 { 3547 pv_entry_t pv; 3548 3549 rw_assert(&pvh_global_lock, RA_WLOCKED); 3550 3551 pv = pmap_find_pv(pvh, pmap, va); 3552 if (pv != NULL) 3553 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 3554 3555 return (pv); 3556 } 3557 3558 static void 3559 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 3560 { 3561 pv_entry_t pv; 3562 3563 pv = pmap_pvh_remove(pvh, pmap, va); 3564 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 3565 pmap_free_pv_entry(pmap, pv); 3566 } 3567 3568 static boolean_t 3569 pmap_pv_insert_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3570 { 3571 struct md_page *pvh; 3572 pv_entry_t pv; 3573 3574 rw_assert(&pvh_global_lock, RA_WLOCKED); 3575 if (pv_entry_count < pv_entry_high_water && 3576 (pv = pmap_get_pv_entry(pmap, TRUE)) != NULL) { 3577 pv->pv_va = va; 3578 pvh = pa_to_pvh(pa); 3579 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 3580 return (TRUE); 3581 } else 3582 return (FALSE); 3583 } 3584 3585 /* 3586 * Create the pv entries for each of the pages within a superpage. 3587 */ 3588 static void 3589 pmap_pv_demote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3590 { 3591 struct md_page *pvh; 3592 pv_entry_t pve, pv; 3593 vm_offset_t va_last; 3594 vm_page_t m; 3595 3596 rw_assert(&pvh_global_lock, RA_WLOCKED); 3597 KASSERT((pa & L1_S_OFFSET) == 0, 3598 ("pmap_pv_demote_section: pa is not 1mpage aligned")); 3599 3600 /* 3601 * Transfer the 1mpage's pv entry for this mapping to the first 3602 * page's pv list. 3603 */ 3604 pvh = pa_to_pvh(pa); 3605 va = trunc_1mpage(va); 3606 pv = pmap_pvh_remove(pvh, pmap, va); 3607 KASSERT(pv != NULL, ("pmap_pv_demote_section: pv not found")); 3608 m = PHYS_TO_VM_PAGE(pa); 3609 TAILQ_INSERT_HEAD(&m->md.pv_list, pv, pv_list); 3610 /* Instantiate the remaining pv entries. */ 3611 va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE; 3612 do { 3613 m++; 3614 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3615 ("pmap_pv_demote_section: page %p is not managed", m)); 3616 va += PAGE_SIZE; 3617 pve = pmap_get_pv_entry(pmap, FALSE); 3618 pmap_enter_pv(m, pve, pmap, va, pv->pv_flags); 3619 } while (va < va_last); 3620 } 3621 3622 static void 3623 pmap_pv_promote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 3624 { 3625 struct md_page *pvh; 3626 pv_entry_t pv; 3627 vm_offset_t va_last; 3628 vm_page_t m; 3629 3630 rw_assert(&pvh_global_lock, RA_WLOCKED); 3631 KASSERT((pa & L1_S_OFFSET) == 0, 3632 ("pmap_pv_promote_section: pa is not 1mpage aligned")); 3633 3634 /* 3635 * Transfer the first page's pv entry for this mapping to the 3636 * 1mpage's pv list. Aside from avoiding the cost of a call 3637 * to get_pv_entry(), a transfer avoids the possibility that 3638 * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() 3639 * removes one of the mappings that is being promoted. 3640 */ 3641 m = PHYS_TO_VM_PAGE(pa); 3642 va = trunc_1mpage(va); 3643 pv = pmap_pvh_remove(&m->md, pmap, va); 3644 KASSERT(pv != NULL, ("pmap_pv_promote_section: pv not found")); 3645 pvh = pa_to_pvh(pa); 3646 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 3647 /* Free the remaining pv entries in the newly mapped section pages */ 3648 va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE; 3649 do { 3650 m++; 3651 va += PAGE_SIZE; 3652 /* 3653 * Don't care the flags, first pv contains sufficient 3654 * information for all of the pages so nothing is really lost. 3655 */ 3656 pmap_pvh_free(&m->md, pmap, va); 3657 } while (va < va_last); 3658 } 3659 3660 /* 3661 * Tries to create a 1MB page mapping. Returns TRUE if successful and 3662 * FALSE otherwise. Fails if (1) page is unmanageg, kernel pmap or vectors 3663 * page, (2) a mapping already exists at the specified virtual address, or 3664 * (3) a pv entry cannot be allocated without reclaiming another pv entry. 3665 */ 3666 static boolean_t 3667 pmap_enter_section(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3668 { 3669 pd_entry_t *pl1pd; 3670 vm_offset_t pa; 3671 struct l2_bucket *l2b; 3672 3673 rw_assert(&pvh_global_lock, RA_WLOCKED); 3674 PMAP_ASSERT_LOCKED(pmap); 3675 3676 /* Skip kernel, vectors page and unmanaged mappings */ 3677 if ((pmap == pmap_kernel()) || (L1_IDX(va) == L1_IDX(vector_page)) || 3678 ((m->oflags & VPO_UNMANAGED) != 0)) { 3679 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3680 " in pmap %p", va, pmap); 3681 return (FALSE); 3682 } 3683 /* 3684 * Check whether this is a valid section superpage entry or 3685 * there is a l2_bucket associated with that L1 page directory. 3686 */ 3687 va = trunc_1mpage(va); 3688 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 3689 l2b = pmap_get_l2_bucket(pmap, va); 3690 if ((*pl1pd & L1_S_PROTO) || (l2b != NULL)) { 3691 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3692 " in pmap %p", va, pmap); 3693 return (FALSE); 3694 } 3695 pa = VM_PAGE_TO_PHYS(m); 3696 /* 3697 * Abort this mapping if its PV entry could not be created. 3698 */ 3699 if (!pmap_pv_insert_section(pmap, va, VM_PAGE_TO_PHYS(m))) { 3700 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" 3701 " in pmap %p", va, pmap); 3702 return (FALSE); 3703 } 3704 /* 3705 * Increment counters. 3706 */ 3707 pmap->pm_stats.resident_count += L2_PTE_NUM_TOTAL; 3708 /* 3709 * Despite permissions, mark the superpage read-only. 3710 */ 3711 prot &= ~VM_PROT_WRITE; 3712 /* 3713 * Map the superpage. 3714 */ 3715 pmap_map_section(pmap, va, pa, prot, FALSE); 3716 3717 pmap_section_mappings++; 3718 CTR2(KTR_PMAP, "pmap_enter_section: success for va %#lx" 3719 " in pmap %p", va, pmap); 3720 return (TRUE); 3721 } 3722 3723 /* 3724 * pmap_remove_section: do the things to unmap a superpage in a process 3725 */ 3726 static void 3727 pmap_remove_section(pmap_t pmap, vm_offset_t sva) 3728 { 3729 struct md_page *pvh; 3730 struct l2_bucket *l2b; 3731 pd_entry_t *pl1pd, l1pd; 3732 vm_offset_t eva, va; 3733 vm_page_t m; 3734 3735 PMAP_ASSERT_LOCKED(pmap); 3736 if ((pmap == pmap_kernel()) || (L1_IDX(sva) == L1_IDX(vector_page))) 3737 return; 3738 3739 KASSERT((sva & L1_S_OFFSET) == 0, 3740 ("pmap_remove_section: sva is not 1mpage aligned")); 3741 3742 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; 3743 l1pd = *pl1pd; 3744 3745 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 3746 KASSERT((m != NULL && ((m->oflags & VPO_UNMANAGED) == 0)), 3747 ("pmap_remove_section: no corresponding vm_page or " 3748 "page unmanaged")); 3749 3750 pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL; 3751 pvh = pa_to_pvh(l1pd & L1_S_FRAME); 3752 pmap_pvh_free(pvh, pmap, sva); 3753 eva = L2_NEXT_BUCKET(sva); 3754 for (va = sva, m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); 3755 va < eva; va += PAGE_SIZE, m++) { 3756 /* 3757 * Mark base pages referenced but skip marking them dirty. 3758 * If the superpage is writeable, hence all base pages were 3759 * already marked as dirty in pmap_fault_fixup() before 3760 * promotion. Reference bit however, might not have been set 3761 * for each base page when the superpage was created at once, 3762 * not as a result of promotion. 3763 */ 3764 if (L1_S_REFERENCED(l1pd)) 3765 vm_page_aflag_set(m, PGA_REFERENCED); 3766 if (TAILQ_EMPTY(&m->md.pv_list) && 3767 TAILQ_EMPTY(&pvh->pv_list)) 3768 vm_page_aflag_clear(m, PGA_WRITEABLE); 3769 } 3770 3771 l2b = pmap_get_l2_bucket(pmap, sva); 3772 if (l2b != NULL) { 3773 KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL, 3774 ("pmap_remove_section: l2_bucket occupancy error")); 3775 pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL); 3776 } 3777 /* Now invalidate L1 slot */ 3778 *pl1pd = 0; 3779 PTE_SYNC(pl1pd); 3780 if (L1_S_EXECUTABLE(l1pd)) 3781 cpu_tlb_flushID_SE(sva); 3782 else 3783 cpu_tlb_flushD_SE(sva); 3784 cpu_cpwait(); 3785 } 3786 3787 /* 3788 * Tries to promote the 256, contiguous 4KB page mappings that are 3789 * within a single l2_bucket to a single 1MB section mapping. 3790 * For promotion to occur, two conditions must be met: (1) the 4KB page 3791 * mappings must map aligned, contiguous physical memory and (2) the 4KB page 3792 * mappings must have identical characteristics. 3793 */ 3794 static void 3795 pmap_promote_section(pmap_t pmap, vm_offset_t va) 3796 { 3797 pt_entry_t *firstptep, firstpte, oldpte, pa, *pte; 3798 vm_page_t m, oldm; 3799 vm_offset_t first_va, old_va; 3800 struct l2_bucket *l2b = NULL; 3801 vm_prot_t prot; 3802 struct pv_entry *pve, *first_pve; 3803 3804 PMAP_ASSERT_LOCKED(pmap); 3805 3806 prot = VM_PROT_ALL; 3807 /* 3808 * Skip promoting kernel pages. This is justified by following: 3809 * 1. Kernel is already mapped using section mappings in each pmap 3810 * 2. Managed mappings within the kernel are not to be promoted anyway 3811 */ 3812 if (pmap == pmap_kernel()) { 3813 pmap_section_p_failures++; 3814 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3815 " in pmap %p", va, pmap); 3816 return; 3817 } 3818 /* Do not attemp to promote vectors pages */ 3819 if (L1_IDX(va) == L1_IDX(vector_page)) { 3820 pmap_section_p_failures++; 3821 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3822 " in pmap %p", va, pmap); 3823 return; 3824 } 3825 /* 3826 * Examine the first PTE in the specified l2_bucket. Abort if this PTE 3827 * is either invalid, unused, or does not map the first 4KB physical 3828 * page within 1MB page. 3829 */ 3830 first_va = trunc_1mpage(va); 3831 l2b = pmap_get_l2_bucket(pmap, first_va); 3832 KASSERT(l2b != NULL, ("pmap_promote_section: trying to promote " 3833 "not existing l2 bucket")); 3834 firstptep = &l2b->l2b_kva[0]; 3835 3836 firstpte = *firstptep; 3837 if ((l2pte_pa(firstpte) & L1_S_OFFSET) != 0) { 3838 pmap_section_p_failures++; 3839 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3840 " in pmap %p", va, pmap); 3841 return; 3842 } 3843 3844 if ((firstpte & (L2_S_PROTO | L2_S_REF)) != (L2_S_PROTO | L2_S_REF)) { 3845 pmap_section_p_failures++; 3846 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3847 " in pmap %p", va, pmap); 3848 return; 3849 } 3850 /* 3851 * ARM uses pv_entry to mark particular mapping WIRED so don't promote 3852 * unmanaged pages since it is impossible to determine, whether the 3853 * page is wired or not if there is no corresponding pv_entry. 3854 */ 3855 m = PHYS_TO_VM_PAGE(l2pte_pa(firstpte)); 3856 if (m && ((m->oflags & VPO_UNMANAGED) != 0)) { 3857 pmap_section_p_failures++; 3858 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" 3859 " in pmap %p", va, pmap); 3860 return; 3861 } 3862 first_pve = pmap_find_pv(&m->md, pmap, first_va); 3863 /* 3864 * PTE is modified only on write due to modified bit 3865 * emulation. If the entry is referenced and writable 3866 * then it is modified and we don't clear write enable. 3867 * Otherwise, writing is disabled in PTE anyway and 3868 * we just configure protections for the section mapping 3869 * that is going to be created. 3870 */ 3871 if ((first_pve->pv_flags & PVF_WRITE) != 0) { 3872 if (!L2_S_WRITABLE(firstpte)) { 3873 first_pve->pv_flags &= ~PVF_WRITE; 3874 prot &= ~VM_PROT_WRITE; 3875 } 3876 } else 3877 prot &= ~VM_PROT_WRITE; 3878 3879 if (!L2_S_EXECUTABLE(firstpte)) 3880 prot &= ~VM_PROT_EXECUTE; 3881 3882 /* 3883 * Examine each of the other PTEs in the specified l2_bucket. 3884 * Abort if this PTE maps an unexpected 4KB physical page or 3885 * does not have identical characteristics to the first PTE. 3886 */ 3887 pa = l2pte_pa(firstpte) + ((L2_PTE_NUM_TOTAL - 1) * PAGE_SIZE); 3888 old_va = L2_NEXT_BUCKET(first_va) - PAGE_SIZE; 3889 3890 for (pte = (firstptep + L2_PTE_NUM_TOTAL - 1); pte > firstptep; pte--) { 3891 oldpte = *pte; 3892 if (l2pte_pa(oldpte) != pa) { 3893 pmap_section_p_failures++; 3894 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3895 "va %#x in pmap %p", va, pmap); 3896 return; 3897 } 3898 if ((oldpte & L2_S_PROMOTE) != (firstpte & L2_S_PROMOTE)) { 3899 pmap_section_p_failures++; 3900 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3901 "va %#x in pmap %p", va, pmap); 3902 return; 3903 } 3904 oldm = PHYS_TO_VM_PAGE(l2pte_pa(oldpte)); 3905 if (oldm && ((oldm->oflags & VPO_UNMANAGED) != 0)) { 3906 pmap_section_p_failures++; 3907 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3908 "va %#x in pmap %p", va, pmap); 3909 return; 3910 } 3911 3912 pve = pmap_find_pv(&oldm->md, pmap, old_va); 3913 if (pve == NULL) { 3914 pmap_section_p_failures++; 3915 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3916 "va %#x old_va %x - no pve", va, old_va); 3917 return; 3918 } 3919 3920 if (!L2_S_WRITABLE(oldpte) && (pve->pv_flags & PVF_WRITE)) 3921 pve->pv_flags &= ~PVF_WRITE; 3922 if (pve->pv_flags != first_pve->pv_flags) { 3923 pmap_section_p_failures++; 3924 CTR2(KTR_PMAP, "pmap_promote_section: failure for " 3925 "va %#x in pmap %p", va, pmap); 3926 return; 3927 } 3928 3929 old_va -= PAGE_SIZE; 3930 pa -= PAGE_SIZE; 3931 } 3932 /* 3933 * Promote the pv entries. 3934 */ 3935 pmap_pv_promote_section(pmap, first_va, l2pte_pa(firstpte)); 3936 /* 3937 * Map the superpage. 3938 */ 3939 pmap_map_section(pmap, first_va, l2pte_pa(firstpte), prot, TRUE); 3940 /* 3941 * Invalidate all possible TLB mappings for small 3942 * pages within the newly created superpage. 3943 * Rely on the first PTE's attributes since they 3944 * have to be consistent across all of the base pages 3945 * within the superpage. If page is not executable it 3946 * is at least referenced. 3947 * The fastest way to do that is to invalidate whole 3948 * TLB at once instead of executing 256 CP15 TLB 3949 * invalidations by single entry. TLBs usually maintain 3950 * several dozen entries so loss of unrelated entries is 3951 * still a less agresive approach. 3952 */ 3953 if (L2_S_EXECUTABLE(firstpte)) 3954 cpu_tlb_flushID(); 3955 else 3956 cpu_tlb_flushD(); 3957 cpu_cpwait(); 3958 3959 pmap_section_promotions++; 3960 CTR2(KTR_PMAP, "pmap_promote_section: success for va %#x" 3961 " in pmap %p", first_va, pmap); 3962 } 3963 3964 /* 3965 * Fills a l2_bucket with mappings to consecutive physical pages. 3966 */ 3967 static void 3968 pmap_fill_l2b(struct l2_bucket *l2b, pt_entry_t newpte) 3969 { 3970 pt_entry_t *ptep; 3971 int i; 3972 3973 for (i = 0; i < L2_PTE_NUM_TOTAL; i++) { 3974 ptep = &l2b->l2b_kva[i]; 3975 *ptep = newpte; 3976 PTE_SYNC(ptep); 3977 3978 newpte += PAGE_SIZE; 3979 } 3980 3981 l2b->l2b_occupancy = L2_PTE_NUM_TOTAL; 3982 } 3983 3984 /* 3985 * Tries to demote a 1MB section mapping. If demotion fails, the 3986 * 1MB section mapping is invalidated. 3987 */ 3988 static boolean_t 3989 pmap_demote_section(pmap_t pmap, vm_offset_t va) 3990 { 3991 struct l2_bucket *l2b; 3992 struct pv_entry *l1pdpve; 3993 struct md_page *pvh; 3994 pd_entry_t *pl1pd, l1pd, newl1pd; 3995 pt_entry_t *firstptep, newpte; 3996 vm_offset_t pa; 3997 vm_page_t m; 3998 3999 PMAP_ASSERT_LOCKED(pmap); 4000 /* 4001 * According to assumptions described in pmap_promote_section, 4002 * kernel is and always should be mapped using 1MB section mappings. 4003 * What more, managed kernel pages were not to be promoted. 4004 */ 4005 KASSERT(pmap != pmap_kernel() && L1_IDX(va) != L1_IDX(vector_page), 4006 ("pmap_demote_section: forbidden section mapping")); 4007 4008 va = trunc_1mpage(va); 4009 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 4010 l1pd = *pl1pd; 4011 KASSERT((l1pd & L1_TYPE_MASK) == L1_S_PROTO, 4012 ("pmap_demote_section: not section or invalid section")); 4013 4014 pa = l1pd & L1_S_FRAME; 4015 m = PHYS_TO_VM_PAGE(pa); 4016 KASSERT((m != NULL && (m->oflags & VPO_UNMANAGED) == 0), 4017 ("pmap_demote_section: no vm_page for selected superpage or" 4018 "unmanaged")); 4019 4020 pvh = pa_to_pvh(pa); 4021 l1pdpve = pmap_find_pv(pvh, pmap, va); 4022 KASSERT(l1pdpve != NULL, ("pmap_demote_section: no pv entry for " 4023 "managed page")); 4024 4025 l2b = pmap_get_l2_bucket(pmap, va); 4026 if (l2b == NULL) { 4027 KASSERT((l1pdpve->pv_flags & PVF_WIRED) == 0, 4028 ("pmap_demote_section: No l2_bucket for wired mapping")); 4029 /* 4030 * Invalidate the 1MB section mapping and return 4031 * "failure" if the mapping was never accessed or the 4032 * allocation of the new l2_bucket fails. 4033 */ 4034 if (!L1_S_REFERENCED(l1pd) || 4035 (l2b = pmap_alloc_l2_bucket(pmap, va)) == NULL) { 4036 /* Unmap and invalidate superpage. */ 4037 pmap_remove_section(pmap, trunc_1mpage(va)); 4038 CTR2(KTR_PMAP, "pmap_demote_section: failure for " 4039 "va %#x in pmap %p", va, pmap); 4040 return (FALSE); 4041 } 4042 } 4043 4044 /* 4045 * Now we should have corresponding l2_bucket available. 4046 * Let's process it to recreate 256 PTEs for each base page 4047 * within superpage. 4048 */ 4049 newpte = pa | L1_S_DEMOTE(l1pd); 4050 if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) 4051 newpte |= pte_l2_s_cache_mode; 4052 4053 /* 4054 * If the l2_bucket is new, initialize it. 4055 */ 4056 if (l2b->l2b_occupancy == 0) 4057 pmap_fill_l2b(l2b, newpte); 4058 else { 4059 firstptep = &l2b->l2b_kva[0]; 4060 KASSERT(l2pte_pa(*firstptep) == (pa), 4061 ("pmap_demote_section: firstpte and newpte map different " 4062 "physical addresses")); 4063 /* 4064 * If the mapping has changed attributes, update the page table 4065 * entries. 4066 */ 4067 if ((*firstptep & L2_S_PROMOTE) != (L1_S_DEMOTE(l1pd))) 4068 pmap_fill_l2b(l2b, newpte); 4069 } 4070 /* Demote PV entry */ 4071 pmap_pv_demote_section(pmap, va, pa); 4072 4073 /* Now fix-up L1 */ 4074 newl1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; 4075 *pl1pd = newl1pd; 4076 PTE_SYNC(pl1pd); 4077 /* Invalidate old TLB mapping */ 4078 if (L1_S_EXECUTABLE(l1pd)) 4079 cpu_tlb_flushID_SE(va); 4080 else if (L1_S_REFERENCED(l1pd)) 4081 cpu_tlb_flushD_SE(va); 4082 cpu_cpwait(); 4083 4084 pmap_section_demotions++; 4085 CTR2(KTR_PMAP, "pmap_demote_section: success for va %#x" 4086 " in pmap %p", va, pmap); 4087 return (TRUE); 4088 } 4089 4090 /*************************************************** 4091 * page management routines. 4092 ***************************************************/ 4093 4094 /* 4095 * We are in a serious low memory condition. Resort to 4096 * drastic measures to free some pages so we can allocate 4097 * another pv entry chunk. 4098 */ 4099 static vm_page_t 4100 pmap_pv_reclaim(pmap_t locked_pmap) 4101 { 4102 struct pch newtail; 4103 struct pv_chunk *pc; 4104 struct l2_bucket *l2b = NULL; 4105 pmap_t pmap; 4106 pd_entry_t *pl1pd; 4107 pt_entry_t *ptep; 4108 pv_entry_t pv; 4109 vm_offset_t va; 4110 vm_page_t free, m, m_pc; 4111 uint32_t inuse; 4112 int bit, field, freed, idx; 4113 4114 PMAP_ASSERT_LOCKED(locked_pmap); 4115 pmap = NULL; 4116 free = m_pc = NULL; 4117 TAILQ_INIT(&newtail); 4118 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || 4119 free == NULL)) { 4120 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 4121 if (pmap != pc->pc_pmap) { 4122 if (pmap != NULL) { 4123 cpu_tlb_flushID(); 4124 cpu_cpwait(); 4125 if (pmap != locked_pmap) 4126 PMAP_UNLOCK(pmap); 4127 } 4128 pmap = pc->pc_pmap; 4129 /* Avoid deadlock and lock recursion. */ 4130 if (pmap > locked_pmap) 4131 PMAP_LOCK(pmap); 4132 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 4133 pmap = NULL; 4134 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4135 continue; 4136 } 4137 } 4138 4139 /* 4140 * Destroy every non-wired, 4 KB page mapping in the chunk. 4141 */ 4142 freed = 0; 4143 for (field = 0; field < _NPCM; field++) { 4144 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 4145 inuse != 0; inuse &= ~(1UL << bit)) { 4146 bit = ffs(inuse) - 1; 4147 idx = field * sizeof(inuse) * NBBY + bit; 4148 pv = &pc->pc_pventry[idx]; 4149 va = pv->pv_va; 4150 4151 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 4152 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4153 continue; 4154 if (pv->pv_flags & PVF_WIRED) 4155 continue; 4156 4157 l2b = pmap_get_l2_bucket(pmap, va); 4158 KASSERT(l2b != NULL, ("No l2 bucket")); 4159 ptep = &l2b->l2b_kva[l2pte_index(va)]; 4160 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 4161 KASSERT((vm_offset_t)m >= KERNBASE, 4162 ("Trying to access non-existent page " 4163 "va %x pte %x", va, *ptep)); 4164 *ptep = 0; 4165 PTE_SYNC(ptep); 4166 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 4167 if (TAILQ_EMPTY(&m->md.pv_list)) 4168 vm_page_aflag_clear(m, PGA_WRITEABLE); 4169 pc->pc_map[field] |= 1UL << bit; 4170 freed++; 4171 } 4172 } 4173 4174 if (freed == 0) { 4175 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4176 continue; 4177 } 4178 /* Every freed mapping is for a 4 KB page. */ 4179 pmap->pm_stats.resident_count -= freed; 4180 PV_STAT(pv_entry_frees += freed); 4181 PV_STAT(pv_entry_spare += freed); 4182 pv_entry_count -= freed; 4183 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4184 for (field = 0; field < _NPCM; field++) 4185 if (pc->pc_map[field] != pc_freemask[field]) { 4186 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 4187 pc_list); 4188 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 4189 4190 /* 4191 * One freed pv entry in locked_pmap is 4192 * sufficient. 4193 */ 4194 if (pmap == locked_pmap) 4195 goto out; 4196 break; 4197 } 4198 if (field == _NPCM) { 4199 PV_STAT(pv_entry_spare -= _NPCPV); 4200 PV_STAT(pc_chunk_count--); 4201 PV_STAT(pc_chunk_frees++); 4202 /* Entire chunk is free; return it. */ 4203 m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 4204 pmap_qremove((vm_offset_t)pc, 1); 4205 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 4206 break; 4207 } 4208 } 4209 out: 4210 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 4211 if (pmap != NULL) { 4212 cpu_tlb_flushID(); 4213 cpu_cpwait(); 4214 if (pmap != locked_pmap) 4215 PMAP_UNLOCK(pmap); 4216 } 4217 return (m_pc); 4218 } 4219 4220 /* 4221 * free the pv_entry back to the free list 4222 */ 4223 static void 4224 pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv) 4225 { 4226 struct pv_chunk *pc; 4227 int bit, field, idx; 4228 4229 rw_assert(&pvh_global_lock, RA_WLOCKED); 4230 PMAP_ASSERT_LOCKED(pmap); 4231 PV_STAT(pv_entry_frees++); 4232 PV_STAT(pv_entry_spare++); 4233 pv_entry_count--; 4234 pc = pv_to_chunk(pv); 4235 idx = pv - &pc->pc_pventry[0]; 4236 field = idx / (sizeof(u_long) * NBBY); 4237 bit = idx % (sizeof(u_long) * NBBY); 4238 pc->pc_map[field] |= 1ul << bit; 4239 for (idx = 0; idx < _NPCM; idx++) 4240 if (pc->pc_map[idx] != pc_freemask[idx]) { 4241 /* 4242 * 98% of the time, pc is already at the head of the 4243 * list. If it isn't already, move it to the head. 4244 */ 4245 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 4246 pc)) { 4247 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4248 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 4249 pc_list); 4250 } 4251 return; 4252 } 4253 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4254 pmap_free_pv_chunk(pc); 4255 } 4256 4257 static void 4258 pmap_free_pv_chunk(struct pv_chunk *pc) 4259 { 4260 vm_page_t m; 4261 4262 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 4263 PV_STAT(pv_entry_spare -= _NPCPV); 4264 PV_STAT(pc_chunk_count--); 4265 PV_STAT(pc_chunk_frees++); 4266 /* entire chunk is free, return it */ 4267 m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 4268 pmap_qremove((vm_offset_t)pc, 1); 4269 vm_page_unwire(m, PQ_INACTIVE); 4270 vm_page_free(m); 4271 pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 4272 4273 } 4274 4275 static pv_entry_t 4276 pmap_get_pv_entry(pmap_t pmap, boolean_t try) 4277 { 4278 static const struct timeval printinterval = { 60, 0 }; 4279 static struct timeval lastprint; 4280 struct pv_chunk *pc; 4281 pv_entry_t pv; 4282 vm_page_t m; 4283 int bit, field, idx; 4284 4285 rw_assert(&pvh_global_lock, RA_WLOCKED); 4286 PMAP_ASSERT_LOCKED(pmap); 4287 PV_STAT(pv_entry_allocs++); 4288 pv_entry_count++; 4289 4290 if (pv_entry_count > pv_entry_high_water) 4291 if (ratecheck(&lastprint, &printinterval)) 4292 printf("%s: Approaching the limit on PV entries.\n", 4293 __func__); 4294 retry: 4295 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 4296 if (pc != NULL) { 4297 for (field = 0; field < _NPCM; field++) { 4298 if (pc->pc_map[field]) { 4299 bit = ffs(pc->pc_map[field]) - 1; 4300 break; 4301 } 4302 } 4303 if (field < _NPCM) { 4304 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; 4305 pv = &pc->pc_pventry[idx]; 4306 pc->pc_map[field] &= ~(1ul << bit); 4307 /* If this was the last item, move it to tail */ 4308 for (field = 0; field < _NPCM; field++) 4309 if (pc->pc_map[field] != 0) { 4310 PV_STAT(pv_entry_spare--); 4311 return (pv); /* not full, return */ 4312 } 4313 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4314 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 4315 PV_STAT(pv_entry_spare--); 4316 return (pv); 4317 } 4318 } 4319 /* 4320 * Access to the ptelist "pv_vafree" is synchronized by the pvh 4321 * global lock. If "pv_vafree" is currently non-empty, it will 4322 * remain non-empty until pmap_ptelist_alloc() completes. 4323 */ 4324 if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 4325 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 4326 if (try) { 4327 pv_entry_count--; 4328 PV_STAT(pc_chunk_tryfail++); 4329 return (NULL); 4330 } 4331 m = pmap_pv_reclaim(pmap); 4332 if (m == NULL) 4333 goto retry; 4334 } 4335 PV_STAT(pc_chunk_count++); 4336 PV_STAT(pc_chunk_allocs++); 4337 pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 4338 pmap_qenter((vm_offset_t)pc, &m, 1); 4339 pc->pc_pmap = pmap; 4340 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 4341 for (field = 1; field < _NPCM; field++) 4342 pc->pc_map[field] = pc_freemask[field]; 4343 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 4344 pv = &pc->pc_pventry[0]; 4345 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 4346 PV_STAT(pv_entry_spare += _NPCPV - 1); 4347 return (pv); 4348 } 4349 4350 /* 4351 * Remove the given range of addresses from the specified map. 4352 * 4353 * It is assumed that the start and end are properly 4354 * rounded to the page size. 4355 */ 4356 #define PMAP_REMOVE_CLEAN_LIST_SIZE 3 4357 void 4358 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 4359 { 4360 struct l2_bucket *l2b; 4361 vm_offset_t next_bucket; 4362 pd_entry_t l1pd; 4363 pt_entry_t *ptep; 4364 u_int total; 4365 u_int mappings, is_exec, is_refd; 4366 int flushall = 0; 4367 4368 4369 /* 4370 * we lock in the pmap => pv_head direction 4371 */ 4372 4373 rw_wlock(&pvh_global_lock); 4374 PMAP_LOCK(pmap); 4375 total = 0; 4376 while (sva < eva) { 4377 next_bucket = L2_NEXT_BUCKET(sva); 4378 4379 /* 4380 * Check for large page. 4381 */ 4382 l1pd = pmap->pm_l1->l1_kva[L1_IDX(sva)]; 4383 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 4384 KASSERT((l1pd & L1_S_DOM_MASK) != 4385 L1_S_DOM(PMAP_DOMAIN_KERNEL), ("pmap_remove: " 4386 "Trying to remove kernel section mapping")); 4387 /* 4388 * Are we removing the entire large page? If not, 4389 * demote the mapping and fall through. 4390 */ 4391 if (sva + L1_S_SIZE == next_bucket && 4392 eva >= next_bucket) { 4393 pmap_remove_section(pmap, sva); 4394 sva = next_bucket; 4395 continue; 4396 } else if (!pmap_demote_section(pmap, sva)) { 4397 /* The large page mapping was destroyed. */ 4398 sva = next_bucket; 4399 continue; 4400 } 4401 } 4402 /* 4403 * Do one L2 bucket's worth at a time. 4404 */ 4405 if (next_bucket > eva) 4406 next_bucket = eva; 4407 4408 l2b = pmap_get_l2_bucket(pmap, sva); 4409 if (l2b == NULL) { 4410 sva = next_bucket; 4411 continue; 4412 } 4413 4414 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4415 mappings = 0; 4416 4417 while (sva < next_bucket) { 4418 struct vm_page *m; 4419 pt_entry_t pte; 4420 vm_paddr_t pa; 4421 4422 pte = *ptep; 4423 4424 if (pte == 0) { 4425 /* 4426 * Nothing here, move along 4427 */ 4428 sva += PAGE_SIZE; 4429 ptep++; 4430 continue; 4431 } 4432 4433 pmap->pm_stats.resident_count--; 4434 pa = l2pte_pa(pte); 4435 is_exec = 0; 4436 is_refd = 1; 4437 4438 /* 4439 * Update flags. In a number of circumstances, 4440 * we could cluster a lot of these and do a 4441 * number of sequential pages in one go. 4442 */ 4443 if ((m = PHYS_TO_VM_PAGE(pa)) != NULL) { 4444 struct pv_entry *pve; 4445 4446 pve = pmap_remove_pv(m, pmap, sva); 4447 if (pve) { 4448 is_exec = PTE_BEEN_EXECD(pte); 4449 is_refd = PTE_BEEN_REFD(pte); 4450 pmap_free_pv_entry(pmap, pve); 4451 } 4452 } 4453 4454 *ptep = 0; 4455 PTE_SYNC(ptep); 4456 if (pmap_is_current(pmap)) { 4457 total++; 4458 if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) { 4459 if (is_exec) 4460 cpu_tlb_flushID_SE(sva); 4461 else if (is_refd) 4462 cpu_tlb_flushD_SE(sva); 4463 } else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE) 4464 flushall = 1; 4465 } 4466 4467 sva += PAGE_SIZE; 4468 ptep++; 4469 mappings++; 4470 } 4471 4472 pmap_free_l2_bucket(pmap, l2b, mappings); 4473 } 4474 4475 rw_wunlock(&pvh_global_lock); 4476 if (flushall) 4477 cpu_tlb_flushID(); 4478 cpu_cpwait(); 4479 4480 PMAP_UNLOCK(pmap); 4481 } 4482 4483 /* 4484 * pmap_zero_page() 4485 * 4486 * Zero a given physical page by mapping it at a page hook point. 4487 * In doing the zero page op, the page we zero is mapped cachable, as with 4488 * StrongARM accesses to non-cached pages are non-burst making writing 4489 * _any_ bulk data very slow. 4490 */ 4491 static void 4492 pmap_zero_page_gen(vm_page_t m, int off, int size) 4493 { 4494 struct czpages *czp; 4495 4496 KASSERT(TAILQ_EMPTY(&m->md.pv_list), 4497 ("pmap_zero_page_gen: page has mappings")); 4498 4499 vm_paddr_t phys = VM_PAGE_TO_PHYS(m); 4500 4501 sched_pin(); 4502 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4503 mtx_lock(&czp->lock); 4504 4505 /* 4506 * Hook in the page, zero it. 4507 */ 4508 *czp->dstptep = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF; 4509 pmap_set_prot(czp->dstptep, VM_PROT_WRITE, 0); 4510 PTE_SYNC(czp->dstptep); 4511 cpu_tlb_flushD_SE(czp->dstva); 4512 cpu_cpwait(); 4513 4514 if (off || size != PAGE_SIZE) 4515 bzero((void *)(czp->dstva + off), size); 4516 else 4517 bzero_page(czp->dstva); 4518 4519 /* 4520 * Although aliasing is not possible, if we use temporary mappings with 4521 * memory that will be mapped later as non-cached or with write-through 4522 * caches, we might end up overwriting it when calling wbinv_all. So 4523 * make sure caches are clean after the operation. 4524 */ 4525 cpu_idcache_wbinv_range(czp->dstva, size); 4526 pmap_l2cache_wbinv_range(czp->dstva, phys, size); 4527 4528 mtx_unlock(&czp->lock); 4529 sched_unpin(); 4530 } 4531 4532 /* 4533 * pmap_zero_page zeros the specified hardware page by mapping 4534 * the page into KVM and using bzero to clear its contents. 4535 */ 4536 void 4537 pmap_zero_page(vm_page_t m) 4538 { 4539 pmap_zero_page_gen(m, 0, PAGE_SIZE); 4540 } 4541 4542 4543 /* 4544 * pmap_zero_page_area zeros the specified hardware page by mapping 4545 * the page into KVM and using bzero to clear its contents. 4546 * 4547 * off and size may not cover an area beyond a single hardware page. 4548 */ 4549 void 4550 pmap_zero_page_area(vm_page_t m, int off, int size) 4551 { 4552 4553 pmap_zero_page_gen(m, off, size); 4554 } 4555 4556 4557 /* 4558 * pmap_zero_page_idle zeros the specified hardware page by mapping 4559 * the page into KVM and using bzero to clear its contents. This 4560 * is intended to be called from the vm_pagezero process only and 4561 * outside of Giant. 4562 */ 4563 void 4564 pmap_zero_page_idle(vm_page_t m) 4565 { 4566 4567 pmap_zero_page(m); 4568 } 4569 4570 /* 4571 * pmap_copy_page copies the specified (machine independent) 4572 * page by mapping the page into virtual memory and using 4573 * bcopy to copy the page, one machine dependent page at a 4574 * time. 4575 */ 4576 4577 /* 4578 * pmap_copy_page() 4579 * 4580 * Copy one physical page into another, by mapping the pages into 4581 * hook points. The same comment regarding cachability as in 4582 * pmap_zero_page also applies here. 4583 */ 4584 void 4585 pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst) 4586 { 4587 struct czpages *czp; 4588 4589 sched_pin(); 4590 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4591 mtx_lock(&czp->lock); 4592 4593 /* 4594 * Map the pages into the page hook points, copy them, and purge the 4595 * cache for the appropriate page. 4596 */ 4597 *czp->srcptep = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF; 4598 pmap_set_prot(czp->srcptep, VM_PROT_READ, 0); 4599 PTE_SYNC(czp->srcptep); 4600 cpu_tlb_flushD_SE(czp->srcva); 4601 *czp->dstptep = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF; 4602 pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0); 4603 PTE_SYNC(czp->dstptep); 4604 cpu_tlb_flushD_SE(czp->dstva); 4605 cpu_cpwait(); 4606 4607 bcopy_page(czp->srcva, czp->dstva); 4608 4609 /* 4610 * Although aliasing is not possible, if we use temporary mappings with 4611 * memory that will be mapped later as non-cached or with write-through 4612 * caches, we might end up overwriting it when calling wbinv_all. So 4613 * make sure caches are clean after the operation. 4614 */ 4615 cpu_idcache_wbinv_range(czp->dstva, PAGE_SIZE); 4616 pmap_l2cache_wbinv_range(czp->dstva, dst, PAGE_SIZE); 4617 4618 mtx_unlock(&czp->lock); 4619 sched_unpin(); 4620 } 4621 4622 int unmapped_buf_allowed = 1; 4623 4624 void 4625 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 4626 vm_offset_t b_offset, int xfersize) 4627 { 4628 vm_page_t a_pg, b_pg; 4629 vm_offset_t a_pg_offset, b_pg_offset; 4630 int cnt; 4631 struct czpages *czp; 4632 4633 sched_pin(); 4634 czp = &cpu_czpages[PCPU_GET(cpuid)]; 4635 mtx_lock(&czp->lock); 4636 4637 while (xfersize > 0) { 4638 a_pg = ma[a_offset >> PAGE_SHIFT]; 4639 a_pg_offset = a_offset & PAGE_MASK; 4640 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 4641 b_pg = mb[b_offset >> PAGE_SHIFT]; 4642 b_pg_offset = b_offset & PAGE_MASK; 4643 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 4644 *czp->srcptep = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) | 4645 pte_l2_s_cache_mode | L2_S_REF; 4646 pmap_set_prot(czp->srcptep, VM_PROT_READ, 0); 4647 PTE_SYNC(czp->srcptep); 4648 cpu_tlb_flushD_SE(czp->srcva); 4649 *czp->dstptep = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) | 4650 pte_l2_s_cache_mode | L2_S_REF; 4651 pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0); 4652 PTE_SYNC(czp->dstptep); 4653 cpu_tlb_flushD_SE(czp->dstva); 4654 cpu_cpwait(); 4655 bcopy((char *)czp->srcva + a_pg_offset, (char *)czp->dstva + b_pg_offset, 4656 cnt); 4657 cpu_idcache_wbinv_range(czp->dstva + b_pg_offset, cnt); 4658 pmap_l2cache_wbinv_range(czp->dstva + b_pg_offset, 4659 VM_PAGE_TO_PHYS(b_pg) + b_pg_offset, cnt); 4660 xfersize -= cnt; 4661 a_offset += cnt; 4662 b_offset += cnt; 4663 } 4664 4665 mtx_unlock(&czp->lock); 4666 sched_unpin(); 4667 } 4668 4669 void 4670 pmap_copy_page(vm_page_t src, vm_page_t dst) 4671 { 4672 4673 if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size && 4674 _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst), 4675 (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0) 4676 return; 4677 4678 pmap_copy_page_generic(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 4679 } 4680 4681 /* 4682 * this routine returns true if a physical page resides 4683 * in the given pmap. 4684 */ 4685 boolean_t 4686 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 4687 { 4688 struct md_page *pvh; 4689 pv_entry_t pv; 4690 int loops = 0; 4691 boolean_t rv; 4692 4693 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4694 ("pmap_page_exists_quick: page %p is not managed", m)); 4695 rv = FALSE; 4696 rw_wlock(&pvh_global_lock); 4697 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4698 if (PV_PMAP(pv) == pmap) { 4699 rv = TRUE; 4700 break; 4701 } 4702 loops++; 4703 if (loops >= 16) 4704 break; 4705 } 4706 if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 4707 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4708 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4709 if (PV_PMAP(pv) == pmap) { 4710 rv = TRUE; 4711 break; 4712 } 4713 loops++; 4714 if (loops >= 16) 4715 break; 4716 } 4717 } 4718 rw_wunlock(&pvh_global_lock); 4719 return (rv); 4720 } 4721 4722 /* 4723 * pmap_page_wired_mappings: 4724 * 4725 * Return the number of managed mappings to the given physical page 4726 * that are wired. 4727 */ 4728 int 4729 pmap_page_wired_mappings(vm_page_t m) 4730 { 4731 int count; 4732 4733 count = 0; 4734 if ((m->oflags & VPO_UNMANAGED) != 0) 4735 return (count); 4736 rw_wlock(&pvh_global_lock); 4737 count = pmap_pvh_wired_mappings(&m->md, count); 4738 if ((m->flags & PG_FICTITIOUS) == 0) { 4739 count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), 4740 count); 4741 } 4742 rw_wunlock(&pvh_global_lock); 4743 return (count); 4744 } 4745 4746 /* 4747 * pmap_pvh_wired_mappings: 4748 * 4749 * Return the updated number "count" of managed mappings that are wired. 4750 */ 4751 static int 4752 pmap_pvh_wired_mappings(struct md_page *pvh, int count) 4753 { 4754 pv_entry_t pv; 4755 4756 rw_assert(&pvh_global_lock, RA_WLOCKED); 4757 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4758 if ((pv->pv_flags & PVF_WIRED) != 0) 4759 count++; 4760 } 4761 return (count); 4762 } 4763 4764 /* 4765 * Returns TRUE if any of the given mappings were referenced and FALSE 4766 * otherwise. Both page and section mappings are supported. 4767 */ 4768 static boolean_t 4769 pmap_is_referenced_pvh(struct md_page *pvh) 4770 { 4771 struct l2_bucket *l2b; 4772 pv_entry_t pv; 4773 pd_entry_t *pl1pd; 4774 pt_entry_t *ptep; 4775 pmap_t pmap; 4776 boolean_t rv; 4777 4778 rw_assert(&pvh_global_lock, RA_WLOCKED); 4779 rv = FALSE; 4780 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4781 pmap = PV_PMAP(pv); 4782 PMAP_LOCK(pmap); 4783 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 4784 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4785 rv = L1_S_REFERENCED(*pl1pd); 4786 else { 4787 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 4788 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 4789 rv = L2_S_REFERENCED(*ptep); 4790 } 4791 PMAP_UNLOCK(pmap); 4792 if (rv) 4793 break; 4794 } 4795 return (rv); 4796 } 4797 4798 /* 4799 * pmap_is_referenced: 4800 * 4801 * Return whether or not the specified physical page was referenced 4802 * in any physical maps. 4803 */ 4804 boolean_t 4805 pmap_is_referenced(vm_page_t m) 4806 { 4807 boolean_t rv; 4808 4809 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4810 ("pmap_is_referenced: page %p is not managed", m)); 4811 rw_wlock(&pvh_global_lock); 4812 rv = pmap_is_referenced_pvh(&m->md) || 4813 ((m->flags & PG_FICTITIOUS) == 0 && 4814 pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4815 rw_wunlock(&pvh_global_lock); 4816 return (rv); 4817 } 4818 4819 /* 4820 * pmap_ts_referenced: 4821 * 4822 * Return the count of reference bits for a page, clearing all of them. 4823 */ 4824 int 4825 pmap_ts_referenced(vm_page_t m) 4826 { 4827 4828 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4829 ("pmap_ts_referenced: page %p is not managed", m)); 4830 return (pmap_clearbit(m, PVF_REF)); 4831 } 4832 4833 /* 4834 * Returns TRUE if any of the given mappings were used to modify 4835 * physical memory. Otherwise, returns FALSE. Both page and 1MB section 4836 * mappings are supported. 4837 */ 4838 static boolean_t 4839 pmap_is_modified_pvh(struct md_page *pvh) 4840 { 4841 pd_entry_t *pl1pd; 4842 struct l2_bucket *l2b; 4843 pv_entry_t pv; 4844 pt_entry_t *ptep; 4845 pmap_t pmap; 4846 boolean_t rv; 4847 4848 rw_assert(&pvh_global_lock, RA_WLOCKED); 4849 rv = FALSE; 4850 4851 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4852 pmap = PV_PMAP(pv); 4853 PMAP_LOCK(pmap); 4854 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; 4855 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) 4856 rv = L1_S_WRITABLE(*pl1pd); 4857 else { 4858 l2b = pmap_get_l2_bucket(pmap, pv->pv_va); 4859 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; 4860 rv = L2_S_WRITABLE(*ptep); 4861 } 4862 PMAP_UNLOCK(pmap); 4863 if (rv) 4864 break; 4865 } 4866 4867 return (rv); 4868 } 4869 4870 boolean_t 4871 pmap_is_modified(vm_page_t m) 4872 { 4873 boolean_t rv; 4874 4875 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4876 ("pmap_is_modified: page %p is not managed", m)); 4877 /* 4878 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 4879 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 4880 * is clear, no PTEs can have APX cleared. 4881 */ 4882 VM_OBJECT_ASSERT_WLOCKED(m->object); 4883 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 4884 return (FALSE); 4885 rw_wlock(&pvh_global_lock); 4886 rv = pmap_is_modified_pvh(&m->md) || 4887 ((m->flags & PG_FICTITIOUS) == 0 && 4888 pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4889 rw_wunlock(&pvh_global_lock); 4890 return (rv); 4891 } 4892 4893 /* 4894 * Apply the given advice to the specified range of addresses within the 4895 * given pmap. Depending on the advice, clear the referenced and/or 4896 * modified flags in each mapping. 4897 */ 4898 void 4899 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 4900 { 4901 struct l2_bucket *l2b; 4902 struct pv_entry *pve; 4903 pd_entry_t l1pd; 4904 pt_entry_t *ptep, opte, pte; 4905 vm_offset_t next_bucket; 4906 vm_page_t m; 4907 4908 if (advice != MADV_DONTNEED && advice != MADV_FREE) 4909 return; 4910 rw_wlock(&pvh_global_lock); 4911 PMAP_LOCK(pmap); 4912 for (; sva < eva; sva = next_bucket) { 4913 next_bucket = L2_NEXT_BUCKET(sva); 4914 if (next_bucket < sva) 4915 next_bucket = eva; 4916 l1pd = pmap->pm_l1->l1_kva[L1_IDX(sva)]; 4917 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 4918 if (pmap == pmap_kernel()) 4919 continue; 4920 if (!pmap_demote_section(pmap, sva)) { 4921 /* 4922 * The large page mapping was destroyed. 4923 */ 4924 continue; 4925 } 4926 /* 4927 * Unless the page mappings are wired, remove the 4928 * mapping to a single page so that a subsequent 4929 * access may repromote. Since the underlying 4930 * l2_bucket is fully populated, this removal 4931 * never frees an entire l2_bucket. 4932 */ 4933 l2b = pmap_get_l2_bucket(pmap, sva); 4934 KASSERT(l2b != NULL, 4935 ("pmap_advise: no l2 bucket for " 4936 "va 0x%#x, pmap 0x%p", sva, pmap)); 4937 ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4938 opte = *ptep; 4939 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); 4940 KASSERT(m != NULL, 4941 ("pmap_advise: no vm_page for demoted superpage")); 4942 pve = pmap_find_pv(&m->md, pmap, sva); 4943 KASSERT(pve != NULL, 4944 ("pmap_advise: no PV entry for managed mapping")); 4945 if ((pve->pv_flags & PVF_WIRED) == 0) { 4946 pmap_free_l2_bucket(pmap, l2b, 1); 4947 pve = pmap_remove_pv(m, pmap, sva); 4948 pmap_free_pv_entry(pmap, pve); 4949 *ptep = 0; 4950 PTE_SYNC(ptep); 4951 if (pmap_is_current(pmap)) { 4952 if (PTE_BEEN_EXECD(opte)) 4953 cpu_tlb_flushID_SE(sva); 4954 else if (PTE_BEEN_REFD(opte)) 4955 cpu_tlb_flushD_SE(sva); 4956 } 4957 } 4958 } 4959 if (next_bucket > eva) 4960 next_bucket = eva; 4961 l2b = pmap_get_l2_bucket(pmap, sva); 4962 if (l2b == NULL) 4963 continue; 4964 for (ptep = &l2b->l2b_kva[l2pte_index(sva)]; 4965 sva != next_bucket; ptep++, sva += PAGE_SIZE) { 4966 opte = pte = *ptep; 4967 if ((opte & L2_S_PROTO) == 0) 4968 continue; 4969 m = PHYS_TO_VM_PAGE(l2pte_pa(opte)); 4970 if (m == NULL || (m->oflags & VPO_UNMANAGED) != 0) 4971 continue; 4972 else if (L2_S_WRITABLE(opte)) { 4973 if (advice == MADV_DONTNEED) { 4974 /* 4975 * Don't need to mark the page 4976 * dirty as it was already marked as 4977 * such in pmap_fault_fixup() or 4978 * pmap_enter_locked(). 4979 * Just clear the state. 4980 */ 4981 } else 4982 pte |= L2_APX; 4983 4984 pte &= ~L2_S_REF; 4985 *ptep = pte; 4986 PTE_SYNC(ptep); 4987 } else if (L2_S_REFERENCED(opte)) { 4988 pte &= ~L2_S_REF; 4989 *ptep = pte; 4990 PTE_SYNC(ptep); 4991 } else 4992 continue; 4993 if (pmap_is_current(pmap)) { 4994 if (PTE_BEEN_EXECD(opte)) 4995 cpu_tlb_flushID_SE(sva); 4996 else if (PTE_BEEN_REFD(opte)) 4997 cpu_tlb_flushD_SE(sva); 4998 } 4999 } 5000 } 5001 cpu_cpwait(); 5002 rw_wunlock(&pvh_global_lock); 5003 PMAP_UNLOCK(pmap); 5004 } 5005 5006 /* 5007 * Clear the modify bits on the specified physical page. 5008 */ 5009 void 5010 pmap_clear_modify(vm_page_t m) 5011 { 5012 5013 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5014 ("pmap_clear_modify: page %p is not managed", m)); 5015 VM_OBJECT_ASSERT_WLOCKED(m->object); 5016 KASSERT(!vm_page_xbusied(m), 5017 ("pmap_clear_modify: page %p is exclusive busied", m)); 5018 5019 /* 5020 * If the page is not PGA_WRITEABLE, then no mappings can be modified. 5021 * If the object containing the page is locked and the page is not 5022 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 5023 */ 5024 if ((m->aflags & PGA_WRITEABLE) == 0) 5025 return; 5026 if (pmap_is_modified(m)) 5027 pmap_clearbit(m, PVF_MOD); 5028 } 5029 5030 5031 /* 5032 * Clear the write and modified bits in each of the given page's mappings. 5033 */ 5034 void 5035 pmap_remove_write(vm_page_t m) 5036 { 5037 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 5038 ("pmap_remove_write: page %p is not managed", m)); 5039 5040 /* 5041 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 5042 * set by another thread while the object is locked. Thus, 5043 * if PGA_WRITEABLE is clear, no page table entries need updating. 5044 */ 5045 VM_OBJECT_ASSERT_WLOCKED(m->object); 5046 if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0) 5047 pmap_clearbit(m, PVF_WRITE); 5048 } 5049 5050 5051 /* 5052 * perform the pmap work for mincore 5053 */ 5054 int 5055 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 5056 { 5057 struct l2_bucket *l2b; 5058 pd_entry_t *pl1pd, l1pd; 5059 pt_entry_t *ptep, pte; 5060 vm_paddr_t pa; 5061 vm_page_t m; 5062 int val; 5063 boolean_t managed; 5064 5065 PMAP_LOCK(pmap); 5066 retry: 5067 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(addr)]; 5068 l1pd = *pl1pd; 5069 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { 5070 pa = (l1pd & L1_S_FRAME); 5071 val = MINCORE_SUPER | MINCORE_INCORE; 5072 if (L1_S_WRITABLE(l1pd)) 5073 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 5074 managed = FALSE; 5075 m = PHYS_TO_VM_PAGE(pa); 5076 if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) 5077 managed = TRUE; 5078 if (managed) { 5079 if (L1_S_REFERENCED(l1pd)) 5080 val |= MINCORE_REFERENCED | 5081 MINCORE_REFERENCED_OTHER; 5082 } 5083 } else { 5084 l2b = pmap_get_l2_bucket(pmap, addr); 5085 if (l2b == NULL) { 5086 val = 0; 5087 goto out; 5088 } 5089 ptep = &l2b->l2b_kva[l2pte_index(addr)]; 5090 pte = *ptep; 5091 if (!l2pte_valid(pte)) { 5092 val = 0; 5093 goto out; 5094 } 5095 val = MINCORE_INCORE; 5096 if (L2_S_WRITABLE(pte)) 5097 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 5098 managed = FALSE; 5099 pa = l2pte_pa(pte); 5100 m = PHYS_TO_VM_PAGE(pa); 5101 if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) 5102 managed = TRUE; 5103 if (managed) { 5104 if (L2_S_REFERENCED(pte)) 5105 val |= MINCORE_REFERENCED | 5106 MINCORE_REFERENCED_OTHER; 5107 } 5108 } 5109 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 5110 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 5111 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 5112 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 5113 goto retry; 5114 } else 5115 out: 5116 PA_UNLOCK_COND(*locked_pa); 5117 PMAP_UNLOCK(pmap); 5118 return (val); 5119 } 5120 5121 void 5122 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 5123 { 5124 } 5125 5126 /* 5127 * Increase the starting virtual address of the given mapping if a 5128 * different alignment might result in more superpage mappings. 5129 */ 5130 void 5131 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 5132 vm_offset_t *addr, vm_size_t size) 5133 { 5134 vm_offset_t superpage_offset; 5135 5136 if (size < NBPDR) 5137 return; 5138 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 5139 offset += ptoa(object->pg_color); 5140 superpage_offset = offset & PDRMASK; 5141 if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 5142 (*addr & PDRMASK) == superpage_offset) 5143 return; 5144 if ((*addr & PDRMASK) < superpage_offset) 5145 *addr = (*addr & ~PDRMASK) + superpage_offset; 5146 else 5147 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 5148 } 5149 5150 /* 5151 * pmap_map_section: 5152 * 5153 * Create a single section mapping. 5154 */ 5155 void 5156 pmap_map_section(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot, 5157 boolean_t ref) 5158 { 5159 pd_entry_t *pl1pd, l1pd; 5160 pd_entry_t fl; 5161 5162 KASSERT(((va | pa) & L1_S_OFFSET) == 0, 5163 ("Not a valid section mapping")); 5164 5165 fl = pte_l1_s_cache_mode; 5166 5167 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; 5168 l1pd = L1_S_PROTO | pa | L1_S_PROT(PTE_USER, prot) | fl | 5169 L1_S_DOM(pmap->pm_domain); 5170 5171 /* Mark page referenced if this section is a result of a promotion. */ 5172 if (ref == TRUE) 5173 l1pd |= L1_S_REF; 5174 #ifdef SMP 5175 l1pd |= L1_SHARED; 5176 #endif 5177 *pl1pd = l1pd; 5178 PTE_SYNC(pl1pd); 5179 } 5180 5181 /* 5182 * pmap_link_l2pt: 5183 * 5184 * Link the L2 page table specified by l2pv.pv_pa into the L1 5185 * page table at the slot for "va". 5186 */ 5187 void 5188 pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv) 5189 { 5190 pd_entry_t *pde = (pd_entry_t *) l1pt, proto; 5191 u_int slot = va >> L1_S_SHIFT; 5192 5193 proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO; 5194 5195 #ifdef VERBOSE_INIT_ARM 5196 printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va); 5197 #endif 5198 5199 pde[slot + 0] = proto | (l2pv->pv_pa + 0x000); 5200 PTE_SYNC(&pde[slot]); 5201 5202 SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list); 5203 5204 } 5205 5206 /* 5207 * pmap_map_entry 5208 * 5209 * Create a single page mapping. 5210 */ 5211 void 5212 pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot, 5213 int cache) 5214 { 5215 pd_entry_t *pde = (pd_entry_t *) l1pt; 5216 pt_entry_t fl; 5217 pt_entry_t *ptep; 5218 5219 KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin")); 5220 5221 fl = l2s_mem_types[cache]; 5222 5223 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) 5224 panic("pmap_map_entry: no L2 table for VA 0x%08x", va); 5225 5226 ptep = (pt_entry_t *)kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK); 5227 5228 if (ptep == NULL) 5229 panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va); 5230 5231 ptep[l2pte_index(va)] = L2_S_PROTO | pa | fl | L2_S_REF; 5232 pmap_set_prot(&ptep[l2pte_index(va)], prot, 0); 5233 PTE_SYNC(&ptep[l2pte_index(va)]); 5234 } 5235 5236 /* 5237 * pmap_map_chunk: 5238 * 5239 * Map a chunk of memory using the most efficient mappings 5240 * possible (section. large page, small page) into the 5241 * provided L1 and L2 tables at the specified virtual address. 5242 */ 5243 vm_size_t 5244 pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, 5245 vm_size_t size, int prot, int type) 5246 { 5247 pd_entry_t *pde = (pd_entry_t *) l1pt; 5248 pt_entry_t *ptep, f1, f2s, f2l; 5249 vm_size_t resid; 5250 int i; 5251 5252 resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); 5253 5254 if (l1pt == 0) 5255 panic("pmap_map_chunk: no L1 table provided"); 5256 5257 #ifdef VERBOSE_INIT_ARM 5258 printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x " 5259 "prot=0x%x type=%d\n", pa, va, size, resid, prot, type); 5260 #endif 5261 5262 f1 = l1_mem_types[type]; 5263 f2l = l2l_mem_types[type]; 5264 f2s = l2s_mem_types[type]; 5265 5266 size = resid; 5267 5268 while (resid > 0) { 5269 /* See if we can use a section mapping. */ 5270 if (L1_S_MAPPABLE_P(va, pa, resid)) { 5271 #ifdef VERBOSE_INIT_ARM 5272 printf("S"); 5273 #endif 5274 pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | 5275 L1_S_PROT(PTE_KERNEL, prot | VM_PROT_EXECUTE) | 5276 f1 | L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_S_REF; 5277 PTE_SYNC(&pde[va >> L1_S_SHIFT]); 5278 va += L1_S_SIZE; 5279 pa += L1_S_SIZE; 5280 resid -= L1_S_SIZE; 5281 continue; 5282 } 5283 5284 /* 5285 * Ok, we're going to use an L2 table. Make sure 5286 * one is actually in the corresponding L1 slot 5287 * for the current VA. 5288 */ 5289 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) 5290 panic("pmap_map_chunk: no L2 table for VA 0x%08x", va); 5291 5292 ptep = (pt_entry_t *) kernel_pt_lookup( 5293 pde[L1_IDX(va)] & L1_C_ADDR_MASK); 5294 if (ptep == NULL) 5295 panic("pmap_map_chunk: can't find L2 table for VA" 5296 "0x%08x", va); 5297 /* See if we can use a L2 large page mapping. */ 5298 if (L2_L_MAPPABLE_P(va, pa, resid)) { 5299 #ifdef VERBOSE_INIT_ARM 5300 printf("L"); 5301 #endif 5302 for (i = 0; i < 16; i++) { 5303 ptep[l2pte_index(va) + i] = 5304 L2_L_PROTO | pa | 5305 L2_L_PROT(PTE_KERNEL, prot) | f2l; 5306 PTE_SYNC(&ptep[l2pte_index(va) + i]); 5307 } 5308 va += L2_L_SIZE; 5309 pa += L2_L_SIZE; 5310 resid -= L2_L_SIZE; 5311 continue; 5312 } 5313 5314 /* Use a small page mapping. */ 5315 #ifdef VERBOSE_INIT_ARM 5316 printf("P"); 5317 #endif 5318 ptep[l2pte_index(va)] = L2_S_PROTO | pa | f2s | L2_S_REF; 5319 pmap_set_prot(&ptep[l2pte_index(va)], prot, 0); 5320 PTE_SYNC(&ptep[l2pte_index(va)]); 5321 va += PAGE_SIZE; 5322 pa += PAGE_SIZE; 5323 resid -= PAGE_SIZE; 5324 } 5325 #ifdef VERBOSE_INIT_ARM 5326 printf("\n"); 5327 #endif 5328 return (size); 5329 5330 } 5331 5332 int 5333 pmap_dmap_iscurrent(pmap_t pmap) 5334 { 5335 return(pmap_is_current(pmap)); 5336 } 5337 5338 void 5339 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 5340 { 5341 /* 5342 * Remember the memattr in a field that gets used to set the appropriate 5343 * bits in the PTEs as mappings are established. 5344 */ 5345 m->md.pv_memattr = ma; 5346 5347 /* 5348 * It appears that this function can only be called before any mappings 5349 * for the page are established on ARM. If this ever changes, this code 5350 * will need to walk the pv_list and make each of the existing mappings 5351 * uncacheable, being careful to sync caches and PTEs (and maybe 5352 * invalidate TLB?) for any current mapping it modifies. 5353 */ 5354 if (TAILQ_FIRST(&m->md.pv_list) != NULL) 5355 panic("Can't change memattr on page with existing mappings"); 5356 } 5357