1 /*
2  * Copyright (c) 2003-2016 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * pmap invalidation support code.  Certain hardware requirements must
37  * be dealt with when manipulating page table entries and page directory
38  * entries within a pmap.  In particular, we cannot safely manipulate
39  * page tables which are in active use by another cpu (even if it is
40  * running in userland) for two reasons: First, TLB writebacks will
41  * race against our own modifications and tests.  Second, even if we
42  * were to use bus-locked instruction we can still screw up the
43  * target cpu's instruction pipeline due to Intel cpu errata.
44  *
45  * For our virtual page tables, the real kernel will handle SMP interactions
46  * with pmaps that may be active on other cpus.  Even so, we have to be
47  * careful about bit setting races particularly when we are trying to clean
48  * a page and test the modified bit to avoid races where the modified bit
49  * might get set after our poll but before we clear the field.
50  */
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/proc.h>
55 #include <sys/vmmeter.h>
56 #include <sys/thread2.h>
57 #include <sys/cdefs.h>
58 #include <sys/mman.h>
59 #include <sys/vmspace.h>
60 
61 #include <vm/vm.h>
62 #include <vm/pmap.h>
63 #include <vm/vm_object.h>
64 
65 #include <machine/cputypes.h>
66 #include <machine/md_var.h>
67 #include <machine/specialreg.h>
68 #include <machine/smp.h>
69 #include <machine/globaldata.h>
70 #include <machine/pmap.h>
71 #include <machine/pmap_inval.h>
72 
73 #include <unistd.h>
74 
75 #include <vm/vm_page2.h>
76 
77 /*
78  * Invalidate va in the TLB on the current cpu
79  */
80 static __inline
81 void
82 pmap_inval_cpu(struct pmap *pmap, vm_offset_t va, size_t bytes)
83 {
84 	if (pmap == kernel_pmap) {
85 		madvise((void *)va, bytes, MADV_INVAL);
86 	} else {
87 		vmspace_mcontrol(pmap, (void *)va, bytes, MADV_INVAL, 0);
88 	}
89 }
90 
91 /*
92  * Invalidate a pte in a pmap and synchronize with target cpus
93  * as required.  Throw away the modified and access bits.  Use
94  * pmap_clean_pte() to do the same thing but also get an interlocked
95  * modified/access status.
96  *
97  * Clearing the field first (basically clearing VPTE_V) prevents any
98  * new races from occuring while we invalidate the TLB (i.e. the pmap
99  * on the real cpu), then clear it again to clean out any race that
100  * might have occured before the invalidation completed.
101  */
102 void
103 pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
104 {
105 	atomic_swap_long(ptep, 0);
106 	pmap_inval_cpu(pmap, va, PAGE_SIZE);
107 }
108 
109 /*
110  * Same as pmap_inval_pte() but only synchronize with the current
111  * cpu.  For the moment its the same as the non-quick version.
112  */
113 void
114 pmap_inval_pte_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
115 {
116 	atomic_swap_long(ptep, 0);
117 	pmap_inval_cpu(pmap, va, PAGE_SIZE);
118 }
119 
120 /*
121  * Invalidate the tlb for a range of virtual addresses across all cpus
122  * belonging to the pmap.
123  */
124 void
125 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
126 {
127 	pmap_inval_cpu(pmap, sva, eva - sva);
128 }
129 
130 /*
131  * Invalidating page directory entries requires some additional
132  * sophistication.  The cachemask must be cleared so the kernel
133  * resynchronizes its temporary page table mappings cache.
134  */
135 void
136 pmap_inval_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
137 {
138 	atomic_swap_long(ptep, 0);
139 	pmap_inval_cpu(pmap, va, SEG_SIZE);
140 }
141 
142 void
143 pmap_inval_pde_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
144 {
145 	pmap_inval_pde(ptep, pmap, va);
146 }
147 
148 /*
149  * This is really nasty.
150  *
151  * (1) The vkernel interlocks pte operations with the related vm_page_t
152  *     spin-lock (and doesn't handle unmanaged page races).
153  *
154  * (2) The vkernel must also issu an invalidation to the real cpu.  It
155  *     (nastily) does this while holding the spin-lock too.
156  *
157  * In addition, atomic ops must be used to properly interlock against
158  * other cpus and the real kernel (which could be taking a fault on another
159  * cpu and will adjust VPTE_M and VPTE_A appropriately).
160  *
161  * The atomicc ops do a good job of interlocking against other cpus, but
162  * we still need to lock the pte location (which we use the vm_page spin-lock
163  * for) to avoid races against PG_WRITEABLE and other tests.
164  *
165  * Cleaning the pte involves clearing VPTE_M and VPTE_RW, synchronizing with
166  * the real host, and updating the vm_page appropriately.
167  *
168  * If the caller passes a non-NULL (m), the caller holds the spin-lock,
169  * otherwise we must acquire and release the spin-lock.  (m) is only
170  * applicable to managed pages.
171  */
172 vpte_t
173 pmap_clean_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va,
174 	       vm_page_t m)
175 {
176 	vpte_t pte;
177 	int spin = 0;
178 
179 	/*
180 	 * Acquire (m) and spin-lock it.
181 	 */
182 	while (m == NULL) {
183 		pte = *ptep;
184 		if ((pte & VPTE_V) == 0)
185 			return pte;
186 		if ((pte & VPTE_MANAGED) == 0)
187 			break;
188 		m = PHYS_TO_VM_PAGE(pte & VPTE_FRAME);
189 		vm_page_spin_lock(m);
190 
191 		pte = *ptep;
192 		if ((pte & VPTE_V) == 0) {
193 			vm_page_spin_unlock(m);
194 			m = NULL;
195 			continue;
196 		}
197 		if ((pte & VPTE_MANAGED) == 0) {
198 			vm_page_spin_unlock(m);
199 			m = NULL;
200 			continue;
201 		}
202 		if (m != PHYS_TO_VM_PAGE(pte & VPTE_FRAME)) {
203 			vm_page_spin_unlock(m);
204 			m = NULL;
205 			continue;
206 		}
207 		spin = 1;
208 		break;
209 	}
210 
211 	for (;;) {
212 		pte = *ptep;
213 		cpu_ccfence();
214 		if ((pte & VPTE_RW) == 0)
215 			break;
216 		if (atomic_cmpset_long(ptep, pte, pte & ~(VPTE_RW | VPTE_M))) {
217 			pmap_inval_cpu(pmap, va, PAGE_SIZE);
218 			break;
219 		}
220 	}
221 
222 	if (m) {
223 		if (pte & VPTE_A) {
224 			vm_page_flag_set(m, PG_REFERENCED);
225 			atomic_clear_long(ptep, VPTE_A);
226 		}
227 		if (pte & VPTE_M) {
228 			vm_page_dirty(m);
229 		}
230 		if (spin)
231 			vm_page_spin_unlock(m);
232 	}
233 	return pte;
234 }
235 
236 /*
237  * This is a combination of pmap_inval_pte() and pmap_clean_pte().
238  * Firts prevent races with the 'A' and 'M' bits, then clean out
239  * the tlb (the real cpu's pmap), then incorporate any races that
240  * may have occured in the mean time, and finally zero out the pte.
241  */
242 vpte_t
243 pmap_inval_loadandclear(volatile vpte_t *ptep, struct pmap *pmap,
244 			vm_offset_t va)
245 {
246 	vpte_t pte;
247 
248 	pte = atomic_swap_long(ptep, 0);
249 	pmap_inval_cpu(pmap, va, PAGE_SIZE);
250 	return(pte);
251 }
252 
253 void
254 cpu_invlpg(void *addr)
255 {
256 	madvise(addr, PAGE_SIZE, MADV_INVAL);
257 }
258 
259 void
260 cpu_invltlb(void)
261 {
262 	madvise((void *)KvaStart, KvaEnd - KvaStart, MADV_INVAL);
263 }
264 
265 /*
266  * Invalidate the TLB on all cpus.  Instead what the vkernel does is
267  * ignore VM_PROT_NOSYNC on pmap_enter() calls.
268  */
269 void
270 smp_invltlb(void)
271 {
272 	/* do nothing */
273 }
274 
275 void
276 smp_sniff(void)
277 {
278 	/* not implemented */
279 }
280 
281 void
282 cpu_sniff(int dcpu __unused)
283 {
284 	/* not implemented */
285 }
286