1 /*
2 * Copyright (c) 2003-2016 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*
36 * pmap invalidation support code. Certain hardware requirements must
37 * be dealt with when manipulating page table entries and page directory
38 * entries within a pmap. In particular, we cannot safely manipulate
39 * page tables which are in active use by another cpu (even if it is
40 * running in userland) for two reasons: First, TLB writebacks will
41 * race against our own modifications and tests. Second, even if we
42 * were to use bus-locked instruction we can still screw up the
43 * target cpu's instruction pipeline due to Intel cpu errata.
44 *
45 * For our virtual page tables, the real kernel will handle SMP interactions
46 * with pmaps that may be active on other cpus. Even so, we have to be
47 * careful about bit setting races particularly when we are trying to clean
48 * a page and test the modified bit to avoid races where the modified bit
49 * might get set after our poll but before we clear the field.
50 */
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/proc.h>
55 #include <sys/vmmeter.h>
56 #include <sys/thread2.h>
57 #include <sys/cdefs.h>
58 #include <sys/mman.h>
59 #include <sys/vmspace.h>
60
61 #include <vm/vm.h>
62 #include <vm/pmap.h>
63 #include <vm/vm_object.h>
64
65 #include <machine/cputypes.h>
66 #include <machine/md_var.h>
67 #include <machine/specialreg.h>
68 #include <machine/smp.h>
69 #include <machine/globaldata.h>
70 #include <machine/pmap.h>
71 #include <machine/pmap_inval.h>
72
73 #include <unistd.h>
74
75 #include <vm/vm_page2.h>
76
77 /*
78 * Invalidate va in the TLB on the current cpu
79 */
80 static __inline
81 void
pmap_inval_cpu(struct pmap * pmap,vm_offset_t va,size_t bytes)82 pmap_inval_cpu(struct pmap *pmap, vm_offset_t va, size_t bytes)
83 {
84 if (pmap == kernel_pmap) {
85 madvise((void *)va, bytes, MADV_INVAL);
86 } else {
87 vmspace_mcontrol(pmap, (void *)va, bytes, MADV_INVAL, 0);
88 }
89 }
90
91 /*
92 * Invalidate a pte in a pmap and synchronize with target cpus
93 * as required. Throw away the modified and access bits. Use
94 * pmap_clean_pte() to do the same thing but also get an interlocked
95 * modified/access status.
96 *
97 * Clearing the field first (basically clearing VPTE_V) prevents any
98 * new races from occuring while we invalidate the TLB (i.e. the pmap
99 * on the real cpu), then clear it again to clean out any race that
100 * might have occured before the invalidation completed.
101 */
102 void
pmap_inval_pte(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)103 pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
104 {
105 atomic_swap_long(ptep, 0);
106 pmap_inval_cpu(pmap, va, PAGE_SIZE);
107 }
108
109 /*
110 * Same as pmap_inval_pte() but only synchronize with the current
111 * cpu. For the moment its the same as the non-quick version.
112 */
113 void
pmap_inval_pte_quick(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)114 pmap_inval_pte_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
115 {
116 atomic_swap_long(ptep, 0);
117 pmap_inval_cpu(pmap, va, PAGE_SIZE);
118 }
119
120 /*
121 * Invalidate the tlb for a range of virtual addresses across all cpus
122 * belonging to the pmap.
123 */
124 void
pmap_invalidate_range(pmap_t pmap,vm_offset_t sva,vm_offset_t eva)125 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
126 {
127 pmap_inval_cpu(pmap, sva, eva - sva);
128 }
129
130 /*
131 * Invalidating page directory entries requires some additional
132 * sophistication. The cachemask must be cleared so the kernel
133 * resynchronizes its temporary page table mappings cache.
134 */
135 void
pmap_inval_pde(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)136 pmap_inval_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
137 {
138 atomic_swap_long(ptep, 0);
139 pmap_inval_cpu(pmap, va, SEG_SIZE);
140 }
141
142 void
pmap_inval_pde_quick(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)143 pmap_inval_pde_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
144 {
145 pmap_inval_pde(ptep, pmap, va);
146 }
147
148 /*
149 * This is really nasty.
150 *
151 * (1) The vkernel interlocks pte operations with the related vm_page_t
152 * spin-lock (and doesn't handle unmanaged page races).
153 *
154 * (2) The vkernel must also issu an invalidation to the real cpu. It
155 * (nastily) does this while holding the spin-lock too.
156 *
157 * In addition, atomic ops must be used to properly interlock against
158 * other cpus and the real kernel (which could be taking a fault on another
159 * cpu and will adjust VPTE_M and VPTE_A appropriately).
160 *
161 * The atomicc ops do a good job of interlocking against other cpus, but
162 * we still need to lock the pte location (which we use the vm_page spin-lock
163 * for) to avoid races against PG_WRITEABLE and other tests.
164 *
165 * Cleaning the pte involves clearing VPTE_M and VPTE_RW, synchronizing with
166 * the real host, and updating the vm_page appropriately.
167 *
168 * If the caller passes a non-NULL (m), the caller holds the spin-lock,
169 * otherwise we must acquire and release the spin-lock. (m) is only
170 * applicable to managed pages.
171 */
172 vpte_t
pmap_clean_pte(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va,vm_page_t m)173 pmap_clean_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va,
174 vm_page_t m)
175 {
176 vpte_t pte;
177 int spin = 0;
178
179 /*
180 * Acquire (m) and spin-lock it.
181 */
182 while (m == NULL) {
183 pte = *ptep;
184 if ((pte & VPTE_V) == 0)
185 return pte;
186 if ((pte & VPTE_MANAGED) == 0)
187 break;
188 m = PHYS_TO_VM_PAGE(pte & VPTE_FRAME);
189 vm_page_spin_lock(m);
190
191 pte = *ptep;
192 if ((pte & VPTE_V) == 0) {
193 vm_page_spin_unlock(m);
194 m = NULL;
195 continue;
196 }
197 if ((pte & VPTE_MANAGED) == 0) {
198 vm_page_spin_unlock(m);
199 m = NULL;
200 continue;
201 }
202 if (m != PHYS_TO_VM_PAGE(pte & VPTE_FRAME)) {
203 vm_page_spin_unlock(m);
204 m = NULL;
205 continue;
206 }
207 spin = 1;
208 break;
209 }
210
211 for (;;) {
212 pte = *ptep;
213 cpu_ccfence();
214 if ((pte & VPTE_RW) == 0)
215 break;
216 if (atomic_cmpset_long(ptep, pte, pte & ~(VPTE_RW | VPTE_M))) {
217 pmap_inval_cpu(pmap, va, PAGE_SIZE);
218 break;
219 }
220 }
221
222 if (m) {
223 if (pte & VPTE_A) {
224 vm_page_flag_set(m, PG_REFERENCED);
225 atomic_clear_long(ptep, VPTE_A);
226 }
227 if (pte & VPTE_M) {
228 vm_page_dirty(m);
229 }
230 if (spin)
231 vm_page_spin_unlock(m);
232 }
233 return pte;
234 }
235
236 /*
237 * This is a combination of pmap_inval_pte() and pmap_clean_pte().
238 * Firts prevent races with the 'A' and 'M' bits, then clean out
239 * the tlb (the real cpu's pmap), then incorporate any races that
240 * may have occured in the mean time, and finally zero out the pte.
241 */
242 vpte_t
pmap_inval_loadandclear(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)243 pmap_inval_loadandclear(volatile vpte_t *ptep, struct pmap *pmap,
244 vm_offset_t va)
245 {
246 vpte_t pte;
247
248 pte = atomic_swap_long(ptep, 0);
249 pmap_inval_cpu(pmap, va, PAGE_SIZE);
250 return(pte);
251 }
252
253 void
cpu_invlpg(void * addr)254 cpu_invlpg(void *addr)
255 {
256 madvise(addr, PAGE_SIZE, MADV_INVAL);
257 }
258
259 void
cpu_invltlb(void)260 cpu_invltlb(void)
261 {
262 madvise((void *)KvaStart, KvaEnd - KvaStart, MADV_INVAL);
263 }
264
265 /*
266 * Invalidate the TLB on all cpus. Instead what the vkernel does is
267 * ignore VM_PROT_NOSYNC on pmap_enter() calls.
268 */
269 void
smp_invltlb(void)270 smp_invltlb(void)
271 {
272 /* do nothing */
273 }
274
275 void
smp_sniff(void)276 smp_sniff(void)
277 {
278 /* not implemented */
279 }
280
281 void
cpu_sniff(int dcpu __unused)282 cpu_sniff(int dcpu __unused)
283 {
284 /* not implemented */
285 }
286