1da673940SJordan Gordeev /*
295636606SMatthew Dillon  * Copyright (c) 2003-2016 The DragonFly Project.  All rights reserved.
3da673940SJordan Gordeev  *
4da673940SJordan Gordeev  * This code is derived from software contributed to The DragonFly Project
5da673940SJordan Gordeev  * by Matthew Dillon <dillon@backplane.com>
6da673940SJordan Gordeev  *
7da673940SJordan Gordeev  * Redistribution and use in source and binary forms, with or without
8da673940SJordan Gordeev  * modification, are permitted provided that the following conditions
9da673940SJordan Gordeev  * are met:
10da673940SJordan Gordeev  *
11da673940SJordan Gordeev  * 1. Redistributions of source code must retain the above copyright
12da673940SJordan Gordeev  *    notice, this list of conditions and the following disclaimer.
13da673940SJordan Gordeev  * 2. Redistributions in binary form must reproduce the above copyright
14da673940SJordan Gordeev  *    notice, this list of conditions and the following disclaimer in
15da673940SJordan Gordeev  *    the documentation and/or other materials provided with the
16da673940SJordan Gordeev  *    distribution.
17da673940SJordan Gordeev  * 3. Neither the name of The DragonFly Project nor the names of its
18da673940SJordan Gordeev  *    contributors may be used to endorse or promote products derived
19da673940SJordan Gordeev  *    from this software without specific, prior written permission.
20da673940SJordan Gordeev  *
21da673940SJordan Gordeev  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22da673940SJordan Gordeev  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23da673940SJordan Gordeev  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24da673940SJordan Gordeev  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25da673940SJordan Gordeev  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26da673940SJordan Gordeev  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27da673940SJordan Gordeev  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28da673940SJordan Gordeev  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29da673940SJordan Gordeev  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30da673940SJordan Gordeev  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31da673940SJordan Gordeev  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32da673940SJordan Gordeev  * SUCH DAMAGE.
33da673940SJordan Gordeev  */
34da673940SJordan Gordeev 
35da673940SJordan Gordeev /*
36da673940SJordan Gordeev  * pmap invalidation support code.  Certain hardware requirements must
37da673940SJordan Gordeev  * be dealt with when manipulating page table entries and page directory
38da673940SJordan Gordeev  * entries within a pmap.  In particular, we cannot safely manipulate
39da673940SJordan Gordeev  * page tables which are in active use by another cpu (even if it is
40da673940SJordan Gordeev  * running in userland) for two reasons: First, TLB writebacks will
41da673940SJordan Gordeev  * race against our own modifications and tests.  Second, even if we
42da673940SJordan Gordeev  * were to use bus-locked instruction we can still screw up the
43da673940SJordan Gordeev  * target cpu's instruction pipeline due to Intel cpu errata.
44da673940SJordan Gordeev  *
45da673940SJordan Gordeev  * For our virtual page tables, the real kernel will handle SMP interactions
46da673940SJordan Gordeev  * with pmaps that may be active on other cpus.  Even so, we have to be
47da673940SJordan Gordeev  * careful about bit setting races particularly when we are trying to clean
48da673940SJordan Gordeev  * a page and test the modified bit to avoid races where the modified bit
49da673940SJordan Gordeev  * might get set after our poll but before we clear the field.
50da673940SJordan Gordeev  */
51da673940SJordan Gordeev #include <sys/param.h>
52da673940SJordan Gordeev #include <sys/systm.h>
53da673940SJordan Gordeev #include <sys/kernel.h>
54da673940SJordan Gordeev #include <sys/proc.h>
55da673940SJordan Gordeev #include <sys/vmmeter.h>
56da673940SJordan Gordeev #include <sys/thread2.h>
57a86ce0cdSMatthew Dillon #include <sys/cdefs.h>
58da673940SJordan Gordeev #include <sys/mman.h>
59da673940SJordan Gordeev #include <sys/vmspace.h>
60da673940SJordan Gordeev 
61da673940SJordan Gordeev #include <vm/vm.h>
62da673940SJordan Gordeev #include <vm/pmap.h>
63da673940SJordan Gordeev #include <vm/vm_object.h>
64da673940SJordan Gordeev 
65da673940SJordan Gordeev #include <machine/cputypes.h>
66da673940SJordan Gordeev #include <machine/md_var.h>
67da673940SJordan Gordeev #include <machine/specialreg.h>
68da673940SJordan Gordeev #include <machine/smp.h>
69da673940SJordan Gordeev #include <machine/globaldata.h>
70da673940SJordan Gordeev #include <machine/pmap.h>
71da673940SJordan Gordeev #include <machine/pmap_inval.h>
72da673940SJordan Gordeev 
73a86ce0cdSMatthew Dillon #include <unistd.h>
74a86ce0cdSMatthew Dillon 
7595270b7eSMatthew Dillon #include <vm/vm_page2.h>
7695270b7eSMatthew Dillon 
77a86ce0cdSMatthew Dillon /*
78a86ce0cdSMatthew Dillon  * Invalidate va in the TLB on the current cpu
79a86ce0cdSMatthew Dillon  */
80da673940SJordan Gordeev static __inline
81da673940SJordan Gordeev void
pmap_inval_cpu(struct pmap * pmap,vm_offset_t va,size_t bytes)82da673940SJordan Gordeev pmap_inval_cpu(struct pmap *pmap, vm_offset_t va, size_t bytes)
83da673940SJordan Gordeev {
84c713db65SAaron LI 	if (pmap == kernel_pmap) {
85da673940SJordan Gordeev 		madvise((void *)va, bytes, MADV_INVAL);
86da673940SJordan Gordeev 	} else {
87da673940SJordan Gordeev 		vmspace_mcontrol(pmap, (void *)va, bytes, MADV_INVAL, 0);
88da673940SJordan Gordeev 	}
89da673940SJordan Gordeev }
90da673940SJordan Gordeev 
91da673940SJordan Gordeev /*
92da673940SJordan Gordeev  * Invalidate a pte in a pmap and synchronize with target cpus
93da673940SJordan Gordeev  * as required.  Throw away the modified and access bits.  Use
94da673940SJordan Gordeev  * pmap_clean_pte() to do the same thing but also get an interlocked
95da673940SJordan Gordeev  * modified/access status.
96da673940SJordan Gordeev  *
97da673940SJordan Gordeev  * Clearing the field first (basically clearing VPTE_V) prevents any
98da673940SJordan Gordeev  * new races from occuring while we invalidate the TLB (i.e. the pmap
99da673940SJordan Gordeev  * on the real cpu), then clear it again to clean out any race that
100da673940SJordan Gordeev  * might have occured before the invalidation completed.
101da673940SJordan Gordeev  */
102da673940SJordan Gordeev void
pmap_inval_pte(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)103da673940SJordan Gordeev pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
104da673940SJordan Gordeev {
105c78d5661SMatthew Dillon 	atomic_swap_long(ptep, 0);
106eb36cb6bSMatthew Dillon 	pmap_inval_cpu(pmap, va, PAGE_SIZE);
107da673940SJordan Gordeev }
108da673940SJordan Gordeev 
109da673940SJordan Gordeev /*
1103533bdd2SMatthew Dillon  * Same as pmap_inval_pte() but only synchronize with the current
1113533bdd2SMatthew Dillon  * cpu.  For the moment its the same as the non-quick version.
1123533bdd2SMatthew Dillon  */
1133533bdd2SMatthew Dillon void
pmap_inval_pte_quick(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)1143533bdd2SMatthew Dillon pmap_inval_pte_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
1153533bdd2SMatthew Dillon {
1163533bdd2SMatthew Dillon 	atomic_swap_long(ptep, 0);
1173533bdd2SMatthew Dillon 	pmap_inval_cpu(pmap, va, PAGE_SIZE);
1183533bdd2SMatthew Dillon }
1193533bdd2SMatthew Dillon 
1203533bdd2SMatthew Dillon /*
121ccd67bf6SMatthew Dillon  * Invalidate the tlb for a range of virtual addresses across all cpus
122ccd67bf6SMatthew Dillon  * belonging to the pmap.
123ccd67bf6SMatthew Dillon  */
124ccd67bf6SMatthew Dillon void
pmap_invalidate_range(pmap_t pmap,vm_offset_t sva,vm_offset_t eva)125ccd67bf6SMatthew Dillon pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
126ccd67bf6SMatthew Dillon {
127ccd67bf6SMatthew Dillon 	pmap_inval_cpu(pmap, sva, eva - sva);
128ccd67bf6SMatthew Dillon }
129ccd67bf6SMatthew Dillon 
130ccd67bf6SMatthew Dillon /*
131da673940SJordan Gordeev  * Invalidating page directory entries requires some additional
132da673940SJordan Gordeev  * sophistication.  The cachemask must be cleared so the kernel
133da673940SJordan Gordeev  * resynchronizes its temporary page table mappings cache.
134da673940SJordan Gordeev  */
135da673940SJordan Gordeev void
pmap_inval_pde(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)136da673940SJordan Gordeev pmap_inval_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
137da673940SJordan Gordeev {
138c78d5661SMatthew Dillon 	atomic_swap_long(ptep, 0);
139eb36cb6bSMatthew Dillon 	pmap_inval_cpu(pmap, va, SEG_SIZE);
140da673940SJordan Gordeev }
141da673940SJordan Gordeev 
142da673940SJordan Gordeev void
pmap_inval_pde_quick(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)143da673940SJordan Gordeev pmap_inval_pde_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
144da673940SJordan Gordeev {
145da673940SJordan Gordeev 	pmap_inval_pde(ptep, pmap, va);
146da673940SJordan Gordeev }
147da673940SJordan Gordeev 
148da673940SJordan Gordeev /*
14995270b7eSMatthew Dillon  * This is really nasty.
150da673940SJordan Gordeev  *
15195270b7eSMatthew Dillon  * (1) The vkernel interlocks pte operations with the related vm_page_t
15295270b7eSMatthew Dillon  *     spin-lock (and doesn't handle unmanaged page races).
153b443039bSMatthew Dillon  *
15495270b7eSMatthew Dillon  * (2) The vkernel must also issu an invalidation to the real cpu.  It
15595270b7eSMatthew Dillon  *     (nastily) does this while holding the spin-lock too.
15695270b7eSMatthew Dillon  *
15795270b7eSMatthew Dillon  * In addition, atomic ops must be used to properly interlock against
15895270b7eSMatthew Dillon  * other cpus and the real kernel (which could be taking a fault on another
15995270b7eSMatthew Dillon  * cpu and will adjust VPTE_M and VPTE_A appropriately).
16095270b7eSMatthew Dillon  *
16195270b7eSMatthew Dillon  * The atomicc ops do a good job of interlocking against other cpus, but
16295270b7eSMatthew Dillon  * we still need to lock the pte location (which we use the vm_page spin-lock
16395270b7eSMatthew Dillon  * for) to avoid races against PG_WRITEABLE and other tests.
16495270b7eSMatthew Dillon  *
16595270b7eSMatthew Dillon  * Cleaning the pte involves clearing VPTE_M and VPTE_RW, synchronizing with
16695270b7eSMatthew Dillon  * the real host, and updating the vm_page appropriately.
16795270b7eSMatthew Dillon  *
16895270b7eSMatthew Dillon  * If the caller passes a non-NULL (m), the caller holds the spin-lock,
16995270b7eSMatthew Dillon  * otherwise we must acquire and release the spin-lock.  (m) is only
17095270b7eSMatthew Dillon  * applicable to managed pages.
171da673940SJordan Gordeev  */
172da673940SJordan Gordeev vpte_t
pmap_clean_pte(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va,vm_page_t m)17395270b7eSMatthew Dillon pmap_clean_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va,
17495270b7eSMatthew Dillon 	       vm_page_t m)
175da673940SJordan Gordeev {
176da673940SJordan Gordeev 	vpte_t pte;
17795270b7eSMatthew Dillon 	int spin = 0;
17895270b7eSMatthew Dillon 
17995270b7eSMatthew Dillon 	/*
18095270b7eSMatthew Dillon 	 * Acquire (m) and spin-lock it.
18195270b7eSMatthew Dillon 	 */
18295270b7eSMatthew Dillon 	while (m == NULL) {
18395270b7eSMatthew Dillon 		pte = *ptep;
18495270b7eSMatthew Dillon 		if ((pte & VPTE_V) == 0)
18595270b7eSMatthew Dillon 			return pte;
18695270b7eSMatthew Dillon 		if ((pte & VPTE_MANAGED) == 0)
18795270b7eSMatthew Dillon 			break;
18895270b7eSMatthew Dillon 		m = PHYS_TO_VM_PAGE(pte & VPTE_FRAME);
18995270b7eSMatthew Dillon 		vm_page_spin_lock(m);
19095270b7eSMatthew Dillon 
19195270b7eSMatthew Dillon 		pte = *ptep;
19295270b7eSMatthew Dillon 		if ((pte & VPTE_V) == 0) {
19395270b7eSMatthew Dillon 			vm_page_spin_unlock(m);
19495270b7eSMatthew Dillon 			m = NULL;
19595270b7eSMatthew Dillon 			continue;
19695270b7eSMatthew Dillon 		}
19795270b7eSMatthew Dillon 		if ((pte & VPTE_MANAGED) == 0) {
19895270b7eSMatthew Dillon 			vm_page_spin_unlock(m);
19995270b7eSMatthew Dillon 			m = NULL;
20095270b7eSMatthew Dillon 			continue;
20195270b7eSMatthew Dillon 		}
20295270b7eSMatthew Dillon 		if (m != PHYS_TO_VM_PAGE(pte & VPTE_FRAME)) {
20395270b7eSMatthew Dillon 			vm_page_spin_unlock(m);
20495270b7eSMatthew Dillon 			m = NULL;
20595270b7eSMatthew Dillon 			continue;
20695270b7eSMatthew Dillon 		}
20795270b7eSMatthew Dillon 		spin = 1;
20895270b7eSMatthew Dillon 		break;
20995270b7eSMatthew Dillon 	}
210da673940SJordan Gordeev 
211b443039bSMatthew Dillon 	for (;;) {
212b443039bSMatthew Dillon 		pte = *ptep;
213b443039bSMatthew Dillon 		cpu_ccfence();
214b443039bSMatthew Dillon 		if ((pte & VPTE_RW) == 0)
215b443039bSMatthew Dillon 			break;
216*5229377cSSascha Wildner 		if (atomic_cmpset_long(ptep, pte, pte & ~(VPTE_RW | VPTE_M))) {
217da673940SJordan Gordeev 			pmap_inval_cpu(pmap, va, PAGE_SIZE);
218b443039bSMatthew Dillon 			break;
219b443039bSMatthew Dillon 		}
220b443039bSMatthew Dillon 	}
221da673940SJordan Gordeev 
22295270b7eSMatthew Dillon 	if (m) {
22395270b7eSMatthew Dillon 		if (pte & VPTE_A) {
22495270b7eSMatthew Dillon 			vm_page_flag_set(m, PG_REFERENCED);
22595270b7eSMatthew Dillon 			atomic_clear_long(ptep, VPTE_A);
226a86ce0cdSMatthew Dillon 		}
22795270b7eSMatthew Dillon 		if (pte & VPTE_M) {
22895270b7eSMatthew Dillon 			vm_page_dirty(m);
229da673940SJordan Gordeev 		}
23095270b7eSMatthew Dillon 		if (spin)
23195270b7eSMatthew Dillon 			vm_page_spin_unlock(m);
232a86ce0cdSMatthew Dillon 	}
23300eb801eSMatthew Dillon 	return pte;
234da673940SJordan Gordeev }
235da673940SJordan Gordeev 
236da673940SJordan Gordeev /*
237da673940SJordan Gordeev  * This is a combination of pmap_inval_pte() and pmap_clean_pte().
238da673940SJordan Gordeev  * Firts prevent races with the 'A' and 'M' bits, then clean out
239da673940SJordan Gordeev  * the tlb (the real cpu's pmap), then incorporate any races that
240da673940SJordan Gordeev  * may have occured in the mean time, and finally zero out the pte.
241da673940SJordan Gordeev  */
242da673940SJordan Gordeev vpte_t
pmap_inval_loadandclear(volatile vpte_t * ptep,struct pmap * pmap,vm_offset_t va)243da673940SJordan Gordeev pmap_inval_loadandclear(volatile vpte_t *ptep, struct pmap *pmap,
244da673940SJordan Gordeev 			vm_offset_t va)
245da673940SJordan Gordeev {
246da673940SJordan Gordeev 	vpte_t pte;
247da673940SJordan Gordeev 
248eb36cb6bSMatthew Dillon 	pte = atomic_swap_long(ptep, 0);
249da673940SJordan Gordeev 	pmap_inval_cpu(pmap, va, PAGE_SIZE);
250da673940SJordan Gordeev 	return(pte);
251da673940SJordan Gordeev }
252a86ce0cdSMatthew Dillon 
253a86ce0cdSMatthew Dillon void
cpu_invlpg(void * addr)254a86ce0cdSMatthew Dillon cpu_invlpg(void *addr)
255a86ce0cdSMatthew Dillon {
256a86ce0cdSMatthew Dillon 	madvise(addr, PAGE_SIZE, MADV_INVAL);
257a86ce0cdSMatthew Dillon }
258a86ce0cdSMatthew Dillon 
259a86ce0cdSMatthew Dillon void
cpu_invltlb(void)260a86ce0cdSMatthew Dillon cpu_invltlb(void)
261a86ce0cdSMatthew Dillon {
262a86ce0cdSMatthew Dillon 	madvise((void *)KvaStart, KvaEnd - KvaStart, MADV_INVAL);
263a86ce0cdSMatthew Dillon }
264a86ce0cdSMatthew Dillon 
26595270b7eSMatthew Dillon /*
26695270b7eSMatthew Dillon  * Invalidate the TLB on all cpus.  Instead what the vkernel does is
26795270b7eSMatthew Dillon  * ignore VM_PROT_NOSYNC on pmap_enter() calls.
26895270b7eSMatthew Dillon  */
269a86ce0cdSMatthew Dillon void
smp_invltlb(void)270a86ce0cdSMatthew Dillon smp_invltlb(void)
271a86ce0cdSMatthew Dillon {
272a86ce0cdSMatthew Dillon 	/* do nothing */
273a86ce0cdSMatthew Dillon }
274e32d3244SMatthew Dillon 
275e32d3244SMatthew Dillon void
smp_sniff(void)276e32d3244SMatthew Dillon smp_sniff(void)
277e32d3244SMatthew Dillon {
278e32d3244SMatthew Dillon 	/* not implemented */
279e32d3244SMatthew Dillon }
2803c5ebf18SAntonio Huete Jimenez 
2813c5ebf18SAntonio Huete Jimenez void
cpu_sniff(int dcpu __unused)2823c5ebf18SAntonio Huete Jimenez cpu_sniff(int dcpu __unused)
2833c5ebf18SAntonio Huete Jimenez {
2843c5ebf18SAntonio Huete Jimenez 	/* not implemented */
2853c5ebf18SAntonio Huete Jimenez }
286