1 /* 2 * Copyright (c) 2003-2016 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * pmap invalidation support code. Certain hardware requirements must 37 * be dealt with when manipulating page table entries and page directory 38 * entries within a pmap. In particular, we cannot safely manipulate 39 * page tables which are in active use by another cpu (even if it is 40 * running in userland) for two reasons: First, TLB writebacks will 41 * race against our own modifications and tests. Second, even if we 42 * were to use bus-locked instruction we can still screw up the 43 * target cpu's instruction pipeline due to Intel cpu errata. 44 * 45 * For our virtual page tables, the real kernel will handle SMP interactions 46 * with pmaps that may be active on other cpus. Even so, we have to be 47 * careful about bit setting races particularly when we are trying to clean 48 * a page and test the modified bit to avoid races where the modified bit 49 * might get set after our poll but before we clear the field. 50 */ 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/kernel.h> 54 #include <sys/proc.h> 55 #include <sys/vmmeter.h> 56 #include <sys/thread2.h> 57 #include <sys/cdefs.h> 58 #include <sys/mman.h> 59 #include <sys/vmspace.h> 60 61 #include <vm/vm.h> 62 #include <vm/pmap.h> 63 #include <vm/vm_object.h> 64 65 #include <machine/cputypes.h> 66 #include <machine/md_var.h> 67 #include <machine/specialreg.h> 68 #include <machine/smp.h> 69 #include <machine/globaldata.h> 70 #include <machine/pmap.h> 71 #include <machine/pmap_inval.h> 72 73 #include <unistd.h> 74 75 #include <vm/vm_page2.h> 76 77 /* 78 * Invalidate va in the TLB on the current cpu 79 */ 80 static __inline 81 void 82 pmap_inval_cpu(struct pmap *pmap, vm_offset_t va, size_t bytes) 83 { 84 if (pmap == kernel_pmap) { 85 madvise((void *)va, bytes, MADV_INVAL); 86 } else { 87 vmspace_mcontrol(pmap, (void *)va, bytes, MADV_INVAL, 0); 88 } 89 } 90 91 /* 92 * Invalidate a pte in a pmap and synchronize with target cpus 93 * as required. Throw away the modified and access bits. Use 94 * pmap_clean_pte() to do the same thing but also get an interlocked 95 * modified/access status. 96 * 97 * Clearing the field first (basically clearing VPTE_V) prevents any 98 * new races from occuring while we invalidate the TLB (i.e. the pmap 99 * on the real cpu), then clear it again to clean out any race that 100 * might have occured before the invalidation completed. 101 */ 102 void 103 pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 104 { 105 atomic_swap_long(ptep, 0); 106 pmap_inval_cpu(pmap, va, PAGE_SIZE); 107 } 108 109 /* 110 * Same as pmap_inval_pte() but only synchronize with the current 111 * cpu. For the moment its the same as the non-quick version. 112 */ 113 void 114 pmap_inval_pte_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 115 { 116 atomic_swap_long(ptep, 0); 117 pmap_inval_cpu(pmap, va, PAGE_SIZE); 118 } 119 120 /* 121 * Invalidate the tlb for a range of virtual addresses across all cpus 122 * belonging to the pmap. 123 */ 124 void 125 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 126 { 127 pmap_inval_cpu(pmap, sva, eva - sva); 128 } 129 130 /* 131 * Invalidating page directory entries requires some additional 132 * sophistication. The cachemask must be cleared so the kernel 133 * resynchronizes its temporary page table mappings cache. 134 */ 135 void 136 pmap_inval_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 137 { 138 atomic_swap_long(ptep, 0); 139 pmap_inval_cpu(pmap, va, SEG_SIZE); 140 } 141 142 void 143 pmap_inval_pde_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 144 { 145 pmap_inval_pde(ptep, pmap, va); 146 } 147 148 /* 149 * This is really nasty. 150 * 151 * (1) The vkernel interlocks pte operations with the related vm_page_t 152 * spin-lock (and doesn't handle unmanaged page races). 153 * 154 * (2) The vkernel must also issu an invalidation to the real cpu. It 155 * (nastily) does this while holding the spin-lock too. 156 * 157 * In addition, atomic ops must be used to properly interlock against 158 * other cpus and the real kernel (which could be taking a fault on another 159 * cpu and will adjust VPTE_M and VPTE_A appropriately). 160 * 161 * The atomicc ops do a good job of interlocking against other cpus, but 162 * we still need to lock the pte location (which we use the vm_page spin-lock 163 * for) to avoid races against PG_WRITEABLE and other tests. 164 * 165 * Cleaning the pte involves clearing VPTE_M and VPTE_RW, synchronizing with 166 * the real host, and updating the vm_page appropriately. 167 * 168 * If the caller passes a non-NULL (m), the caller holds the spin-lock, 169 * otherwise we must acquire and release the spin-lock. (m) is only 170 * applicable to managed pages. 171 */ 172 vpte_t 173 pmap_clean_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va, 174 vm_page_t m) 175 { 176 vpte_t pte; 177 int spin = 0; 178 179 /* 180 * Acquire (m) and spin-lock it. 181 */ 182 while (m == NULL) { 183 pte = *ptep; 184 if ((pte & VPTE_V) == 0) 185 return pte; 186 if ((pte & VPTE_MANAGED) == 0) 187 break; 188 m = PHYS_TO_VM_PAGE(pte & VPTE_FRAME); 189 vm_page_spin_lock(m); 190 191 pte = *ptep; 192 if ((pte & VPTE_V) == 0) { 193 vm_page_spin_unlock(m); 194 m = NULL; 195 continue; 196 } 197 if ((pte & VPTE_MANAGED) == 0) { 198 vm_page_spin_unlock(m); 199 m = NULL; 200 continue; 201 } 202 if (m != PHYS_TO_VM_PAGE(pte & VPTE_FRAME)) { 203 vm_page_spin_unlock(m); 204 m = NULL; 205 continue; 206 } 207 spin = 1; 208 break; 209 } 210 211 for (;;) { 212 pte = *ptep; 213 cpu_ccfence(); 214 if ((pte & VPTE_RW) == 0) 215 break; 216 if (atomic_cmpset_long(ptep, pte, pte & ~(VPTE_RW | VPTE_M))) { 217 pmap_inval_cpu(pmap, va, PAGE_SIZE); 218 break; 219 } 220 } 221 222 if (m) { 223 if (pte & VPTE_A) { 224 vm_page_flag_set(m, PG_REFERENCED); 225 atomic_clear_long(ptep, VPTE_A); 226 } 227 if (pte & VPTE_M) { 228 vm_page_dirty(m); 229 } 230 if (spin) 231 vm_page_spin_unlock(m); 232 } 233 return pte; 234 } 235 236 /* 237 * This is a combination of pmap_inval_pte() and pmap_clean_pte(). 238 * Firts prevent races with the 'A' and 'M' bits, then clean out 239 * the tlb (the real cpu's pmap), then incorporate any races that 240 * may have occured in the mean time, and finally zero out the pte. 241 */ 242 vpte_t 243 pmap_inval_loadandclear(volatile vpte_t *ptep, struct pmap *pmap, 244 vm_offset_t va) 245 { 246 vpte_t pte; 247 248 pte = atomic_swap_long(ptep, 0); 249 pmap_inval_cpu(pmap, va, PAGE_SIZE); 250 return(pte); 251 } 252 253 void 254 cpu_invlpg(void *addr) 255 { 256 madvise(addr, PAGE_SIZE, MADV_INVAL); 257 } 258 259 void 260 cpu_invltlb(void) 261 { 262 madvise((void *)KvaStart, KvaEnd - KvaStart, MADV_INVAL); 263 } 264 265 /* 266 * Invalidate the TLB on all cpus. Instead what the vkernel does is 267 * ignore VM_PROT_NOSYNC on pmap_enter() calls. 268 */ 269 void 270 smp_invltlb(void) 271 { 272 /* do nothing */ 273 } 274 275 void 276 smp_sniff(void) 277 { 278 /* not implemented */ 279 } 280 281 void 282 cpu_sniff(int dcpu __unused) 283 { 284 /* not implemented */ 285 } 286