1 /* 2 * Copyright (c) 2003-2016 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.4 2007/07/02 02:22:58 dillon Exp $ 35 */ 36 37 /* 38 * pmap invalidation support code. Certain hardware requirements must 39 * be dealt with when manipulating page table entries and page directory 40 * entries within a pmap. In particular, we cannot safely manipulate 41 * page tables which are in active use by another cpu (even if it is 42 * running in userland) for two reasons: First, TLB writebacks will 43 * race against our own modifications and tests. Second, even if we 44 * were to use bus-locked instruction we can still screw up the 45 * target cpu's instruction pipeline due to Intel cpu errata. 46 * 47 * For our virtual page tables, the real kernel will handle SMP interactions 48 * with pmaps that may be active on other cpus. Even so, we have to be 49 * careful about bit setting races particularly when we are trying to clean 50 * a page and test the modified bit to avoid races where the modified bit 51 * might get set after our poll but before we clear the field. 52 */ 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/proc.h> 57 #include <sys/vmmeter.h> 58 #include <sys/thread2.h> 59 #include <sys/cdefs.h> 60 #include <sys/mman.h> 61 #include <sys/vmspace.h> 62 #include <sys/vmm.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_object.h> 67 68 #include <machine/cputypes.h> 69 #include <machine/md_var.h> 70 #include <machine/specialreg.h> 71 #include <machine/smp.h> 72 #include <machine/globaldata.h> 73 #include <machine/pmap.h> 74 #include <machine/pmap_inval.h> 75 76 #include <unistd.h> 77 #include <pthread.h> 78 79 extern int vmm_enabled; 80 81 /* 82 * Invalidate the TLB on the current cpu 83 * 84 * (VMM enabled only) 85 */ 86 static __inline 87 void 88 vmm_cpu_invltlb(void) 89 { 90 vmm_guest_sync_addr(NULL, NULL); 91 #if 0 92 /* For VMM mode forces vmmexit/resume */ 93 uint64_t rax = -1; 94 __asm __volatile("syscall;" 95 : 96 : "a" (rax) 97 :); 98 #endif 99 } 100 101 /* 102 * Invalidate va in the TLB on the current cpu 103 * 104 * (VMM disabled only) 105 */ 106 static __inline 107 void 108 pmap_inval_cpu(struct pmap *pmap, vm_offset_t va, size_t bytes) 109 { 110 if (pmap == &kernel_pmap) { 111 madvise((void *)va, bytes, MADV_INVAL); 112 } else { 113 vmspace_mcontrol(pmap, (void *)va, bytes, MADV_INVAL, 0); 114 } 115 } 116 117 /* 118 * This is a bit of a mess because we don't know what virtual cpus are 119 * mapped to real cpus. Basically try to optimize the degenerate cases 120 * (primarily related to user processes with only one thread or only one 121 * running thread), and shunt all the rest to the host cpu. The host cpu 122 * will invalidate all real cpu's the vkernel is running on. 123 * 124 * This can't optimize situations where a pmap is only mapped to some of 125 * the virtual cpus, though shunting to the real host will still be faster 126 * if the virtual kernel processes are running on fewer real-host cpus. 127 * (And probably will be faster anyway since there's no round-trip signaling 128 * overhead). 129 * 130 * NOTE: The critical section protects against preemption while the pmap 131 * is locked, which could otherwise result in a deadlock. 132 */ 133 static __inline 134 void 135 guest_sync_addr(struct pmap *pmap, 136 volatile vpte_t *dst_ptep, volatile vpte_t *src_ptep) 137 { 138 globaldata_t gd = mycpu; 139 cpulock_t olock; 140 cpulock_t nlock; 141 142 /* 143 * Lock the pmap 144 */ 145 crit_enter(); 146 for (;;) { 147 olock = pmap->pm_active_lock; 148 cpu_ccfence(); 149 if ((olock & CPULOCK_EXCL) == 0) { 150 nlock = olock | CPULOCK_EXCL; 151 if (atomic_cmpset_int(&pmap->pm_active_lock, 152 olock, nlock)) { 153 break; 154 } 155 } 156 cpu_pause(); 157 lwkt_process_ipiq(); 158 pthread_yield(); 159 } 160 161 /* 162 * Update the pte and synchronize with other cpus. If we can update 163 * it trivially, do so. 164 */ 165 if (CPUMASK_TESTZERO(pmap->pm_active) || 166 CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { 167 if (dst_ptep && src_ptep) 168 *dst_ptep = *src_ptep; 169 vmm_cpu_invltlb(); 170 } else { 171 vmm_guest_sync_addr(__DEVOLATILE(void *, dst_ptep), 172 __DEVOLATILE(void *, src_ptep)); 173 } 174 175 /* 176 * Unlock the pmap 177 */ 178 atomic_clear_int(&pmap->pm_active_lock, CPULOCK_EXCL); 179 crit_exit(); 180 } 181 182 /* 183 * Invalidate a pte in a pmap and synchronize with target cpus 184 * as required. Throw away the modified and access bits. Use 185 * pmap_clean_pte() to do the same thing but also get an interlocked 186 * modified/access status. 187 * 188 * Clearing the field first (basically clearing VPTE_V) prevents any 189 * new races from occuring while we invalidate the TLB (i.e. the pmap 190 * on the real cpu), then clear it again to clean out any race that 191 * might have occured before the invalidation completed. 192 */ 193 void 194 pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 195 { 196 vpte_t pte; 197 198 if (vmm_enabled == 0) { 199 atomic_swap_long(ptep, 0); 200 pmap_inval_cpu(pmap, va, PAGE_SIZE); 201 } else { 202 pte = 0; 203 guest_sync_addr(pmap, ptep, &pte); 204 } 205 } 206 207 /* 208 * Invalidate the tlb for a range of virtual addresses across all cpus 209 * belonging to the pmap. 210 */ 211 void 212 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 213 { 214 if (vmm_enabled == 0) { 215 pmap_inval_cpu(pmap, sva, eva - sva); 216 } else { 217 guest_sync_addr(pmap, NULL, NULL); 218 } 219 } 220 221 /* 222 * Same as pmap_inval_pte() but only synchronize with the current 223 * cpu. For the moment its the same as the non-quick version. 224 */ 225 void 226 pmap_inval_pte_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 227 { 228 atomic_swap_long(ptep, 0); 229 if (vmm_enabled) 230 vmm_cpu_invltlb(); 231 else 232 pmap_inval_cpu(pmap, va, PAGE_SIZE); 233 } 234 235 /* 236 * Invalidating page directory entries requires some additional 237 * sophistication. The cachemask must be cleared so the kernel 238 * resynchronizes its temporary page table mappings cache. 239 */ 240 void 241 pmap_inval_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 242 { 243 vpte_t pte; 244 245 if (vmm_enabled == 0) { 246 *ptep = 0; 247 pmap_inval_cpu(pmap, va, SEG_SIZE); 248 } else if (CPUMASK_TESTMASK(pmap->pm_active, 249 mycpu->gd_other_cpus) == 0) { 250 *ptep = 0; 251 vmm_cpu_invltlb(); 252 } else { 253 pte = 0; 254 guest_sync_addr(pmap, ptep, &pte); 255 } 256 } 257 258 void 259 pmap_inval_pde_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 260 { 261 pmap_inval_pde(ptep, pmap, va); 262 } 263 264 /* 265 * These carefully handle interactions with other cpus and return 266 * the original vpte. Clearing VPTE_RW prevents us from racing the 267 * setting of VPTE_M, allowing us to invalidate the tlb (the real cpu's 268 * pmap) and get good status for VPTE_M. 269 * 270 * When messing with page directory entries we have to clear the cpu 271 * mask to force a reload of the kernel's page table mapping cache. 272 * 273 * clean: clear VPTE_M and VPTE_RW 274 * setro: clear VPTE_RW 275 * load&clear: clear entire field 276 */ 277 #include <stdio.h> 278 279 vpte_t 280 pmap_clean_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 281 { 282 vpte_t pte; 283 284 pte = *ptep; 285 if (pte & VPTE_V) { 286 atomic_clear_long(ptep, VPTE_RW); 287 if (vmm_enabled == 0) { 288 pmap_inval_cpu(pmap, va, PAGE_SIZE); 289 pte = *ptep; 290 } else { 291 guest_sync_addr(pmap, &pte, ptep); 292 } 293 atomic_clear_long(ptep, VPTE_RW|VPTE_M); 294 } 295 return(pte); 296 } 297 298 vpte_t 299 pmap_clean_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 300 { 301 vpte_t pte; 302 303 pte = *ptep; 304 if (pte & VPTE_V) { 305 atomic_clear_long(ptep, VPTE_RW); 306 if (vmm_enabled == 0) { 307 pmap_inval_cpu(pmap, va, SEG_SIZE); 308 pte = *ptep; 309 } else { 310 guest_sync_addr(pmap, &pte, ptep); 311 } 312 atomic_clear_long(ptep, VPTE_RW|VPTE_M); 313 } 314 return(pte); 315 } 316 317 /* 318 * This is an odd case and I'm not sure whether it even occurs in normal 319 * operation. Turn off write access to the page, clean out the tlb 320 * (the real cpu's pmap), and deal with any VPTE_M race that may have 321 * occured. VPTE_M is not cleared. 322 */ 323 vpte_t 324 pmap_setro_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) 325 { 326 vpte_t pte; 327 vpte_t npte; 328 329 pte = *ptep; 330 if (pte & VPTE_V) { 331 atomic_clear_long(ptep, VPTE_RW); 332 if (vmm_enabled == 0) { 333 pmap_inval_cpu(pmap, va, PAGE_SIZE); 334 pte |= *ptep & VPTE_M; 335 } else { 336 guest_sync_addr(pmap, &npte, ptep); 337 pte |= npte & VPTE_M; 338 } 339 } 340 return(pte); 341 } 342 343 /* 344 * This is a combination of pmap_inval_pte() and pmap_clean_pte(). 345 * Firts prevent races with the 'A' and 'M' bits, then clean out 346 * the tlb (the real cpu's pmap), then incorporate any races that 347 * may have occured in the mean time, and finally zero out the pte. 348 */ 349 vpte_t 350 pmap_inval_loadandclear(volatile vpte_t *ptep, struct pmap *pmap, 351 vm_offset_t va) 352 { 353 vpte_t pte; 354 vpte_t npte; 355 356 pte = *ptep; 357 if (pte & VPTE_V) { 358 pte = *ptep; 359 atomic_clear_long(ptep, VPTE_RW); 360 if (vmm_enabled == 0) { 361 pmap_inval_cpu(pmap, va, PAGE_SIZE); 362 pte = (pte & VPTE_RW) | *ptep; 363 } else { 364 guest_sync_addr(pmap, &npte, ptep); 365 pte = (pte & VPTE_RW) | npte; 366 } 367 } 368 atomic_swap_long(ptep, 0); 369 370 return(pte); 371 } 372 373 void 374 cpu_invlpg(void *addr) 375 { 376 if (vmm_enabled) 377 vmm_cpu_invltlb(); /* For VMM mode forces vmmexit/resume */ 378 else 379 madvise(addr, PAGE_SIZE, MADV_INVAL); 380 } 381 382 void 383 cpu_invltlb(void) 384 { 385 if (vmm_enabled) 386 vmm_cpu_invltlb(); /* For VMM mode forces vmmexit/resume */ 387 else 388 madvise((void *)KvaStart, KvaEnd - KvaStart, MADV_INVAL); 389 } 390 391 void 392 smp_invltlb(void) 393 { 394 /* XXX must invalidate the tlb on all cpus */ 395 /* at the moment pmap_inval_pte_quick */ 396 /* do nothing */ 397 } 398 399 void 400 smp_sniff(void) 401 { 402 /* not implemented */ 403 } 404