1b2b3ffcdSSimon Schubert /*
2d5b2d319SMatthew Dillon  * Copyright (c) 2003-2011 The DragonFly Project.  All rights reserved.
3b2b3ffcdSSimon Schubert  *
4b2b3ffcdSSimon Schubert  * This code is derived from software contributed to The DragonFly Project
5b2b3ffcdSSimon Schubert  * by Matthew Dillon <dillon@backplane.com>
6b2b3ffcdSSimon Schubert  *
7b2b3ffcdSSimon Schubert  * Redistribution and use in source and binary forms, with or without
8b2b3ffcdSSimon Schubert  * modification, are permitted provided that the following conditions
9b2b3ffcdSSimon Schubert  * are met:
10b2b3ffcdSSimon Schubert  *
11b2b3ffcdSSimon Schubert  * 1. Redistributions of source code must retain the above copyright
12b2b3ffcdSSimon Schubert  *    notice, this list of conditions and the following disclaimer.
13b2b3ffcdSSimon Schubert  * 2. Redistributions in binary form must reproduce the above copyright
14b2b3ffcdSSimon Schubert  *    notice, this list of conditions and the following disclaimer in
15b2b3ffcdSSimon Schubert  *    the documentation and/or other materials provided with the
16b2b3ffcdSSimon Schubert  *    distribution.
17b2b3ffcdSSimon Schubert  * 3. Neither the name of The DragonFly Project nor the names of its
18b2b3ffcdSSimon Schubert  *    contributors may be used to endorse or promote products derived
19b2b3ffcdSSimon Schubert  *    from this software without specific, prior written permission.
20b2b3ffcdSSimon Schubert  *
21b2b3ffcdSSimon Schubert  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22b2b3ffcdSSimon Schubert  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23b2b3ffcdSSimon Schubert  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24b2b3ffcdSSimon Schubert  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25b2b3ffcdSSimon Schubert  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26b2b3ffcdSSimon Schubert  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27b2b3ffcdSSimon Schubert  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28b2b3ffcdSSimon Schubert  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29b2b3ffcdSSimon Schubert  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30b2b3ffcdSSimon Schubert  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31b2b3ffcdSSimon Schubert  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32b2b3ffcdSSimon Schubert  * SUCH DAMAGE.
33b2b3ffcdSSimon Schubert  */
34b2b3ffcdSSimon Schubert 
35b2b3ffcdSSimon Schubert /*
36b2b3ffcdSSimon Schubert  * pmap invalidation support code.  Certain hardware requirements must
37b2b3ffcdSSimon Schubert  * be dealt with when manipulating page table entries and page directory
38b2b3ffcdSSimon Schubert  * entries within a pmap.  In particular, we cannot safely manipulate
39b2b3ffcdSSimon Schubert  * page tables which are in active use by another cpu (even if it is
40b2b3ffcdSSimon Schubert  * running in userland) for two reasons: First, TLB writebacks will
41b2b3ffcdSSimon Schubert  * race against our own modifications and tests.  Second, even if we
42b2b3ffcdSSimon Schubert  * were to use bus-locked instruction we can still screw up the
43b2b3ffcdSSimon Schubert  * target cpu's instruction pipeline due to Intel cpu errata.
44b2b3ffcdSSimon Schubert  */
45b2b3ffcdSSimon Schubert 
46b2b3ffcdSSimon Schubert #include <sys/param.h>
47b2b3ffcdSSimon Schubert #include <sys/systm.h>
48b2b3ffcdSSimon Schubert #include <sys/kernel.h>
49b2b3ffcdSSimon Schubert #include <sys/proc.h>
50b2b3ffcdSSimon Schubert #include <sys/vmmeter.h>
51b2b3ffcdSSimon Schubert #include <sys/thread2.h>
52ccd67bf6SMatthew Dillon #include <sys/sysctl.h>
53b2b3ffcdSSimon Schubert 
54b2b3ffcdSSimon Schubert #include <vm/vm.h>
55b2b3ffcdSSimon Schubert #include <vm/pmap.h>
56b2b3ffcdSSimon Schubert #include <vm/vm_object.h>
57b2b3ffcdSSimon Schubert 
58b2b3ffcdSSimon Schubert #include <machine/cputypes.h>
59b2b3ffcdSSimon Schubert #include <machine/md_var.h>
60b2b3ffcdSSimon Schubert #include <machine/specialreg.h>
61b2b3ffcdSSimon Schubert #include <machine/smp.h>
62b2b3ffcdSSimon Schubert #include <machine/globaldata.h>
63b2b3ffcdSSimon Schubert #include <machine/pmap.h>
64b2b3ffcdSSimon Schubert #include <machine/pmap_inval.h>
65bba35d66SMatthew Dillon #include <machine/clock.h>
66b2b3ffcdSSimon Schubert 
6779f2da03SMatthew Dillon #if 1	/* DEBUGGING */
68bba35d66SMatthew Dillon #define LOOPRECOVER			/* enable watchdog */
6979f2da03SMatthew Dillon #endif
70b2b3ffcdSSimon Schubert 
71bba35d66SMatthew Dillon /*
724373ea1cSMatthew Dillon  * Watchdog recovery interval, in seconds.
73bba35d66SMatthew Dillon  *
74bba35d66SMatthew Dillon  * The watchdog value is generous for two reasons.  First, because the
754373ea1cSMatthew Dillon  * situation is not supposed to happen at all (but does), and second,
76bba35d66SMatthew Dillon  * because VMs could be very slow at handling IPIs.
77bba35d66SMatthew Dillon  */
784373ea1cSMatthew Dillon #define LOOPRECOVER_TIMEOUT1	2	/* initial recovery */
794373ea1cSMatthew Dillon #define LOOPRECOVER_TIMEOUT2	1	/* repeated recoveries */
80bba35d66SMatthew Dillon 
81ccd67bf6SMatthew Dillon #define MAX_INVAL_PAGES		128
82ccd67bf6SMatthew Dillon 
8379f2da03SMatthew Dillon struct pmap_inval_info {
8479f2da03SMatthew Dillon 	vm_offset_t	va;
8579f2da03SMatthew Dillon 	pt_entry_t	*ptep;
8679f2da03SMatthew Dillon 	pt_entry_t	opte;
8779f2da03SMatthew Dillon 	pt_entry_t	npte;
8879f2da03SMatthew Dillon 	enum { INVDONE, INVSTORE, INVCMPSET } mode;
8979f2da03SMatthew Dillon 	int		success;
9095270b7eSMatthew Dillon 	vm_pindex_t	npgs;
9179f2da03SMatthew Dillon 	cpumask_t	done;
9279f2da03SMatthew Dillon 	cpumask_t	mask;
93bba35d66SMatthew Dillon #ifdef LOOPRECOVER
9479f2da03SMatthew Dillon 	cpumask_t	sigmask;
9579f2da03SMatthew Dillon 	int		failed;
965b49787bSMatthew Dillon 	tsc_uclock_t	tsc_target;
9779f2da03SMatthew Dillon #endif
9879f2da03SMatthew Dillon } __cachealign;
99b2b3ffcdSSimon Schubert 
10079f2da03SMatthew Dillon typedef struct pmap_inval_info pmap_inval_info_t;
10179f2da03SMatthew Dillon 
10279f2da03SMatthew Dillon static pmap_inval_info_t	invinfo[MAXCPU];
10379f2da03SMatthew Dillon extern cpumask_t		smp_invmask;
104bba35d66SMatthew Dillon #ifdef LOOPRECOVER
10579f2da03SMatthew Dillon #ifdef LOOPMASK_IN
10679f2da03SMatthew Dillon extern cpumask_t		smp_in_mask;
10779f2da03SMatthew Dillon #endif
10879f2da03SMatthew Dillon extern cpumask_t		smp_smurf_mask;
10979f2da03SMatthew Dillon #endif
110398af52eSMatthew Dillon static int pmap_inval_watchdog_print;	/* must always default off */
11195270b7eSMatthew Dillon static int pmap_inval_force_allcpus;
11295270b7eSMatthew Dillon static int pmap_inval_force_nonopt;
113ccd67bf6SMatthew Dillon 
114398af52eSMatthew Dillon SYSCTL_INT(_machdep, OID_AUTO, pmap_inval_watchdog_print, CTLFLAG_RW,
115398af52eSMatthew Dillon 	    &pmap_inval_watchdog_print, 0, "");
11695270b7eSMatthew Dillon SYSCTL_INT(_machdep, OID_AUTO, pmap_inval_force_allcpus, CTLFLAG_RW,
11795270b7eSMatthew Dillon 	    &pmap_inval_force_allcpus, 0, "");
11895270b7eSMatthew Dillon SYSCTL_INT(_machdep, OID_AUTO, pmap_inval_force_nonopt, CTLFLAG_RW,
11995270b7eSMatthew Dillon 	    &pmap_inval_force_nonopt, 0, "");
12079f2da03SMatthew Dillon 
12179f2da03SMatthew Dillon static void
pmap_inval_init(pmap_t pmap)12279f2da03SMatthew Dillon pmap_inval_init(pmap_t pmap)
123b2b3ffcdSSimon Schubert {
124cc694a4aSMatthew Dillon 	cpulock_t olock;
125cc694a4aSMatthew Dillon 	cpulock_t nlock;
126c2fb025dSMatthew Dillon 
12779f2da03SMatthew Dillon 	crit_enter_id("inval");
12879f2da03SMatthew Dillon 
129c713db65SAaron LI 	if (pmap != kernel_pmap) {
130c2fb025dSMatthew Dillon 		for (;;) {
131cc694a4aSMatthew Dillon 			olock = pmap->pm_active_lock;
13254341a3bSMatthew Dillon 			cpu_ccfence();
133cc694a4aSMatthew Dillon 			nlock = olock | CPULOCK_EXCL;
134cc694a4aSMatthew Dillon 			if (olock != nlock &&
13579f2da03SMatthew Dillon 			    atomic_cmpset_int(&pmap->pm_active_lock,
13679f2da03SMatthew Dillon 					      olock, nlock)) {
137c2fb025dSMatthew Dillon 				break;
13854341a3bSMatthew Dillon 			}
139c2fb025dSMatthew Dillon 			lwkt_process_ipiq();
140d5b2d319SMatthew Dillon 			cpu_pause();
141b2b3ffcdSSimon Schubert 		}
14239d0d2cbSAaron LI 		atomic_add_64(&pmap->pm_invgen, 1);
143b2b3ffcdSSimon Schubert 	}
144b8d5441dSMatthew Dillon }
145b2b3ffcdSSimon Schubert 
146d5b2d319SMatthew Dillon static void
pmap_inval_done(pmap_t pmap)14779f2da03SMatthew Dillon pmap_inval_done(pmap_t pmap)
148b2b3ffcdSSimon Schubert {
149c713db65SAaron LI 	if (pmap != kernel_pmap) {
15039d0d2cbSAaron LI 		atomic_add_64(&pmap->pm_invgen, 1);
151e1bcf416SMatthew Dillon 		atomic_clear_int(&pmap->pm_active_lock, CPULOCK_EXCL);
15279f2da03SMatthew Dillon 	}
15379f2da03SMatthew Dillon 	crit_exit_id("inval");
154b2b3ffcdSSimon Schubert }
155b2b3ffcdSSimon Schubert 
156bba35d66SMatthew Dillon #ifdef LOOPRECOVER
1575dac90bcSMatthew Dillon 
15879f2da03SMatthew Dillon /*
159bba35d66SMatthew Dillon  * Debugging and lost IPI recovery code.
16079f2da03SMatthew Dillon  */
16179f2da03SMatthew Dillon static
162bba35d66SMatthew Dillon __inline
163bba35d66SMatthew Dillon int
loopwdog(struct pmap_inval_info * info)164bba35d66SMatthew Dillon loopwdog(struct pmap_inval_info *info)
165bba35d66SMatthew Dillon {
1665b49787bSMatthew Dillon 	tsc_uclock_t tsc;
167bba35d66SMatthew Dillon 
168bba35d66SMatthew Dillon 	tsc = rdtsc();
1695b49787bSMatthew Dillon 	if ((tsc_sclock_t)(info->tsc_target - tsc) < 0 && tsc_frequency) {
1704373ea1cSMatthew Dillon 		info->tsc_target = tsc + (tsc_frequency * LOOPRECOVER_TIMEOUT2);
171bba35d66SMatthew Dillon 		return 1;
172bba35d66SMatthew Dillon 	}
173bba35d66SMatthew Dillon 	return 0;
174bba35d66SMatthew Dillon }
175bba35d66SMatthew Dillon 
176bba35d66SMatthew Dillon static
177c2fb025dSMatthew Dillon void
loopdebug(const char * msg,pmap_inval_info_t * info)17879f2da03SMatthew Dillon loopdebug(const char *msg, pmap_inval_info_t *info)
179c2fb025dSMatthew Dillon {
18079f2da03SMatthew Dillon 	int p;
18179f2da03SMatthew Dillon 	int cpu = mycpu->gd_cpuid;
18279f2da03SMatthew Dillon 
183398af52eSMatthew Dillon 	/*
184398af52eSMatthew Dillon 	 * Don't kprintf() anything if the pmap inval watchdog gets hit.
185398af52eSMatthew Dillon 	 * DRM can cause an occassional watchdog hit (at least with a 1/16
186398af52eSMatthew Dillon 	 * second watchdog), and attempting to kprintf to the KVM frame buffer
187398af52eSMatthew Dillon 	 * from Xinvltlb, which ignores critical sections, can implode the
188398af52eSMatthew Dillon 	 * system.
189398af52eSMatthew Dillon 	 */
190398af52eSMatthew Dillon 	if (pmap_inval_watchdog_print == 0)
191398af52eSMatthew Dillon 		return;
192398af52eSMatthew Dillon 
19379f2da03SMatthew Dillon 	cpu_lfence();
194bba35d66SMatthew Dillon #ifdef LOOPRECOVER
19579f2da03SMatthew Dillon 	atomic_add_long(&smp_smurf_mask.ary[0], 0);
1965dac90bcSMatthew Dillon #endif
197bba35d66SMatthew Dillon 	kprintf("ipilost-%s! %d mode=%d m=%08jx d=%08jx "
198bba35d66SMatthew Dillon #ifdef LOOPRECOVER
1995dac90bcSMatthew Dillon 		"s=%08jx "
2005dac90bcSMatthew Dillon #endif
20179f2da03SMatthew Dillon #ifdef LOOPMASK_IN
20279f2da03SMatthew Dillon 		"in=%08jx "
20379f2da03SMatthew Dillon #endif
204bba35d66SMatthew Dillon #ifdef LOOPRECOVER
2055dac90bcSMatthew Dillon 		"smurf=%08jx\n"
20679f2da03SMatthew Dillon #endif
2075dac90bcSMatthew Dillon 		, msg, cpu, info->mode,
2085dac90bcSMatthew Dillon 		info->mask.ary[0],
2095dac90bcSMatthew Dillon 		info->done.ary[0]
210bba35d66SMatthew Dillon #ifdef LOOPRECOVER
2115dac90bcSMatthew Dillon 		, info->sigmask.ary[0]
2125dac90bcSMatthew Dillon #endif
2135dac90bcSMatthew Dillon #ifdef LOOPMASK_IN
2145dac90bcSMatthew Dillon 		, smp_in_mask.ary[0]
2155dac90bcSMatthew Dillon #endif
216bba35d66SMatthew Dillon #ifdef LOOPRECOVER
2175dac90bcSMatthew Dillon 		, smp_smurf_mask.ary[0]
2185dac90bcSMatthew Dillon #endif
2195dac90bcSMatthew Dillon 		);
22079f2da03SMatthew Dillon 	kprintf("mdglob ");
22179f2da03SMatthew Dillon 	for (p = 0; p < ncpus; ++p)
22279f2da03SMatthew Dillon 		kprintf(" %d", CPU_prvspace[p]->mdglobaldata.gd_xinvaltlb);
22379f2da03SMatthew Dillon 	kprintf("\n");
22479f2da03SMatthew Dillon }
22579f2da03SMatthew Dillon 
2265dac90bcSMatthew Dillon #endif
2275dac90bcSMatthew Dillon 
228ccd67bf6SMatthew Dillon #ifdef CHECKSIG
229ccd67bf6SMatthew Dillon 
23079f2da03SMatthew Dillon #define CHECKSIGMASK(info)	_checksigmask(info, __FILE__, __LINE__)
23179f2da03SMatthew Dillon 
23279f2da03SMatthew Dillon static
23379f2da03SMatthew Dillon void
_checksigmask(pmap_inval_info_t * info,const char * file,int line)23479f2da03SMatthew Dillon _checksigmask(pmap_inval_info_t *info, const char *file, int line)
23579f2da03SMatthew Dillon {
23679f2da03SMatthew Dillon 	cpumask_t tmp;
23779f2da03SMatthew Dillon 
23879f2da03SMatthew Dillon 	tmp = info->mask;
23979f2da03SMatthew Dillon 	CPUMASK_ANDMASK(tmp, info->sigmask);
24079f2da03SMatthew Dillon 	if (CPUMASK_CMPMASKNEQ(tmp, info->mask)) {
24179f2da03SMatthew Dillon 		kprintf("\"%s\" line %d: bad sig/mask %08jx %08jx\n",
24279f2da03SMatthew Dillon 			file, line, info->sigmask.ary[0], info->mask.ary[0]);
24379f2da03SMatthew Dillon 	}
24479f2da03SMatthew Dillon }
24579f2da03SMatthew Dillon 
246ccd67bf6SMatthew Dillon #else
24779f2da03SMatthew Dillon 
248ccd67bf6SMatthew Dillon #define CHECKSIGMASK(info)
249ccd67bf6SMatthew Dillon 
250ccd67bf6SMatthew Dillon #endif
251ccd67bf6SMatthew Dillon 
252ccd67bf6SMatthew Dillon /*
253ccd67bf6SMatthew Dillon  * Invalidate the specified va across all cpus associated with the pmap.
254ccd67bf6SMatthew Dillon  * If va == (vm_offset_t)-1, we invltlb() instead of invlpg().  The operation
255ccd67bf6SMatthew Dillon  * will be done fully synchronously with storing npte into *ptep and returning
256ccd67bf6SMatthew Dillon  * opte.
257ccd67bf6SMatthew Dillon  *
258ccd67bf6SMatthew Dillon  * If ptep is NULL the operation will execute semi-synchronously.
259ccd67bf6SMatthew Dillon  * ptep must be NULL if npgs > 1
260ccd67bf6SMatthew Dillon  */
26179f2da03SMatthew Dillon pt_entry_t
pmap_inval_smp(pmap_t pmap,vm_offset_t va,vm_pindex_t npgs,pt_entry_t * ptep,pt_entry_t npte)26295270b7eSMatthew Dillon pmap_inval_smp(pmap_t pmap, vm_offset_t va, vm_pindex_t npgs,
263ccd67bf6SMatthew Dillon 	       pt_entry_t *ptep, pt_entry_t npte)
26479f2da03SMatthew Dillon {
26579f2da03SMatthew Dillon 	globaldata_t gd = mycpu;
26679f2da03SMatthew Dillon 	pmap_inval_info_t *info;
267ccd67bf6SMatthew Dillon 	pt_entry_t opte = 0;
26879f2da03SMatthew Dillon 	int cpu = gd->gd_cpuid;
26979f2da03SMatthew Dillon 	cpumask_t tmpmask;
27079f2da03SMatthew Dillon 	unsigned long rflags;
27179f2da03SMatthew Dillon 
27279f2da03SMatthew Dillon 	/*
2731a5c7e0fSMatthew Dillon 	 * Initialize invalidation for pmap and enter critical section.
27495270b7eSMatthew Dillon 	 * This will enter a critical section for us.
27579f2da03SMatthew Dillon 	 */
27679f2da03SMatthew Dillon 	if (pmap == NULL)
277c713db65SAaron LI 		pmap = kernel_pmap;
2781a5c7e0fSMatthew Dillon 
2791a5c7e0fSMatthew Dillon 	/*
2801a5c7e0fSMatthew Dillon 	 * Shortcut single-cpu case if possible.
2811a5c7e0fSMatthew Dillon 	 */
28295270b7eSMatthew Dillon 	if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask) &&
28395270b7eSMatthew Dillon 	    pmap_inval_force_nonopt == 0) {
284ccd67bf6SMatthew Dillon 		/*
285ccd67bf6SMatthew Dillon 		 * Convert to invltlb if there are too many pages to
286ccd67bf6SMatthew Dillon 		 * invlpg on.
287ccd67bf6SMatthew Dillon 		 */
288e3c330f0SMatthew Dillon 		if (pmap->pm_flags & PMAP_MULTI)
289e3c330f0SMatthew Dillon 			pmap_inval_init(pmap);
29095270b7eSMatthew Dillon 		if (npgs == 1) {
29195270b7eSMatthew Dillon 			if (ptep)
29295270b7eSMatthew Dillon 				opte = atomic_swap_long(ptep, npte);
29395270b7eSMatthew Dillon 			if (va == (vm_offset_t)-1)
29495270b7eSMatthew Dillon 				cpu_invltlb();
29595270b7eSMatthew Dillon 			else
29695270b7eSMatthew Dillon 				cpu_invlpg((void *)va);
29795270b7eSMatthew Dillon 		} else if (va == (vm_offset_t)-1 || npgs > MAX_INVAL_PAGES) {
29895270b7eSMatthew Dillon 			if (ptep) {
299ccd67bf6SMatthew Dillon 				while (npgs) {
30095270b7eSMatthew Dillon 					opte = atomic_swap_long(ptep, npte);
30195270b7eSMatthew Dillon 					++ptep;
302ccd67bf6SMatthew Dillon 					--npgs;
30395270b7eSMatthew Dillon 				}
30495270b7eSMatthew Dillon 			}
30595270b7eSMatthew Dillon 			cpu_invltlb();
30695270b7eSMatthew Dillon 		} else {
30795270b7eSMatthew Dillon 			while (npgs) {
308ccd67bf6SMatthew Dillon 				if (ptep) {
309ccd67bf6SMatthew Dillon 					opte = atomic_swap_long(ptep, npte);
310ccd67bf6SMatthew Dillon 					++ptep;
311ccd67bf6SMatthew Dillon 				}
312ccd67bf6SMatthew Dillon 				cpu_invlpg((void *)va);
313ccd67bf6SMatthew Dillon 				va += PAGE_SIZE;
31495270b7eSMatthew Dillon 				--npgs;
315ccd67bf6SMatthew Dillon 			}
31695270b7eSMatthew Dillon 		}
317e3c330f0SMatthew Dillon 		if (pmap->pm_flags & PMAP_MULTI)
31879f2da03SMatthew Dillon 			pmap_inval_done(pmap);
319ccd67bf6SMatthew Dillon 
32079f2da03SMatthew Dillon 		return opte;
32179f2da03SMatthew Dillon 	}
32279f2da03SMatthew Dillon 
32379f2da03SMatthew Dillon 	/*
3241a5c7e0fSMatthew Dillon 	 * We need a critical section to prevent getting preempted while
3251a5c7e0fSMatthew Dillon 	 * we setup our command.  A preemption might execute its own
3261a5c7e0fSMatthew Dillon 	 * pmap_inval*() command and create confusion below.
327bba35d66SMatthew Dillon 	 *
328bba35d66SMatthew Dillon 	 * tsc_target is our watchdog timeout that will attempt to recover
329bba35d66SMatthew Dillon 	 * from a lost IPI.  Set to 1/16 second for now.
33079f2da03SMatthew Dillon 	 */
331e3c330f0SMatthew Dillon 	pmap_inval_init(pmap);
33279f2da03SMatthew Dillon 	info = &invinfo[cpu];
3331a5c7e0fSMatthew Dillon 
3341a5c7e0fSMatthew Dillon 	/*
3351a5c7e0fSMatthew Dillon 	 * We must wait for other cpus which may still be finishing up a
3361a5c7e0fSMatthew Dillon 	 * prior operation that we requested.
3371a5c7e0fSMatthew Dillon 	 *
3381a5c7e0fSMatthew Dillon 	 * We do not have to disable interrupts here.  An Xinvltlb can occur
3391a5c7e0fSMatthew Dillon 	 * at any time (even within a critical section), but it will not
3401a5c7e0fSMatthew Dillon 	 * act on our command until we set our done bits.
3411a5c7e0fSMatthew Dillon 	 */
34279f2da03SMatthew Dillon 	while (CPUMASK_TESTNZERO(info->done)) {
343bba35d66SMatthew Dillon #ifdef LOOPRECOVER
344bba35d66SMatthew Dillon 		if (loopwdog(info)) {
34579f2da03SMatthew Dillon 			info->failed = 1;
346bba35d66SMatthew Dillon 			loopdebug("A", info);
34779f2da03SMatthew Dillon 			/* XXX recover from possible bug */
34879f2da03SMatthew Dillon 			CPUMASK_ASSZERO(info->done);
34979f2da03SMatthew Dillon 		}
35079f2da03SMatthew Dillon #endif
35179f2da03SMatthew Dillon 		cpu_pause();
35279f2da03SMatthew Dillon 	}
35379f2da03SMatthew Dillon 	KKASSERT(info->mode == INVDONE);
35495270b7eSMatthew Dillon 	cpu_mfence();
35579f2da03SMatthew Dillon 
35679f2da03SMatthew Dillon 	/*
35779f2da03SMatthew Dillon 	 * Must set our cpu in the invalidation scan mask before
35879f2da03SMatthew Dillon 	 * any possibility of [partial] execution (remember, XINVLTLB
35979f2da03SMatthew Dillon 	 * can interrupt a critical section).
36079f2da03SMatthew Dillon 	 */
36179f2da03SMatthew Dillon 	ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu);
36279f2da03SMatthew Dillon 
36395270b7eSMatthew Dillon 	info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1);
36479f2da03SMatthew Dillon 	info->va = va;
365ccd67bf6SMatthew Dillon 	info->npgs = npgs;
36679f2da03SMatthew Dillon 	info->ptep = ptep;
36779f2da03SMatthew Dillon 	info->npte = npte;
36879f2da03SMatthew Dillon 	info->opte = 0;
369bba35d66SMatthew Dillon #ifdef LOOPRECOVER
37079f2da03SMatthew Dillon 	info->failed = 0;
37179f2da03SMatthew Dillon #endif
3721a5c7e0fSMatthew Dillon 	info->mode = INVSTORE;
3731a5c7e0fSMatthew Dillon 
37479f2da03SMatthew Dillon 	tmpmask = pmap->pm_active;	/* volatile (bits may be cleared) */
37595270b7eSMatthew Dillon 	if (pmap_inval_force_allcpus)
37695270b7eSMatthew Dillon 		tmpmask = smp_active_mask;
37779f2da03SMatthew Dillon 	cpu_ccfence();
37879f2da03SMatthew Dillon 	CPUMASK_ANDMASK(tmpmask, smp_active_mask);
379ccd67bf6SMatthew Dillon 
380ccd67bf6SMatthew Dillon 	/*
381ccd67bf6SMatthew Dillon 	 * If ptep is NULL the operation can be semi-synchronous, which means
382ccd67bf6SMatthew Dillon 	 * we can improve performance by flagging and removing idle cpus
383ccd67bf6SMatthew Dillon 	 * (see the idleinvlclr function in mp_machdep.c).
384ccd67bf6SMatthew Dillon 	 *
385ccd67bf6SMatthew Dillon 	 * Typically kernel page table operation is semi-synchronous.
386ccd67bf6SMatthew Dillon 	 */
387ccd67bf6SMatthew Dillon 	if (ptep == NULL)
388ccd67bf6SMatthew Dillon 		smp_smurf_idleinvlclr(&tmpmask);
38979f2da03SMatthew Dillon 	CPUMASK_ORBIT(tmpmask, cpu);
3901a5c7e0fSMatthew Dillon 	info->mask = tmpmask;
39179f2da03SMatthew Dillon 
39279f2da03SMatthew Dillon 	/*
39379f2da03SMatthew Dillon 	 * Command may start executing the moment 'done' is initialized,
39479f2da03SMatthew Dillon 	 * disable current cpu interrupt to prevent 'done' field from
39579f2da03SMatthew Dillon 	 * changing (other cpus can't clear done bits until the originating
39679f2da03SMatthew Dillon 	 * cpu clears its mask bit, but other cpus CAN start clearing their
39779f2da03SMatthew Dillon 	 * mask bits).
39879f2da03SMatthew Dillon 	 */
399bba35d66SMatthew Dillon #ifdef LOOPRECOVER
40079f2da03SMatthew Dillon 	info->sigmask = tmpmask;
40179f2da03SMatthew Dillon 	CHECKSIGMASK(info);
40279f2da03SMatthew Dillon #endif
403ccd67bf6SMatthew Dillon 	cpu_sfence();
4041a5c7e0fSMatthew Dillon 	rflags = read_rflags();
4051a5c7e0fSMatthew Dillon 	cpu_disable_intr();
4061a5c7e0fSMatthew Dillon 
4071a5c7e0fSMatthew Dillon 	ATOMIC_CPUMASK_COPY(info->done, tmpmask);
40895270b7eSMatthew Dillon 	/* execution can begin here on other cpus due to races */
40979f2da03SMatthew Dillon 
41079f2da03SMatthew Dillon 	/*
41179f2da03SMatthew Dillon 	 * Pass our copy of the done bits (so they don't change out from
41279f2da03SMatthew Dillon 	 * under us) to generate the Xinvltlb interrupt on the targets.
4130ad80e33SMatthew Dillon 	 *
4140ad80e33SMatthew Dillon 	 * smp_invlpg() issues the command, synchronizes with other cpus,
4150ad80e33SMatthew Dillon 	 * and executes the command on our cpu.  Upon return other cpus
4160ad80e33SMatthew Dillon 	 * may still be in the process of exiting their synchroniization.
41779f2da03SMatthew Dillon 	 */
41879f2da03SMatthew Dillon 	smp_invlpg(&tmpmask);
41979f2da03SMatthew Dillon 	opte = info->opte;
42079f2da03SMatthew Dillon 	KKASSERT(info->mode == INVDONE);
42179f2da03SMatthew Dillon 
42279f2da03SMatthew Dillon 	/*
42379f2da03SMatthew Dillon 	 * Target cpus will be in their loop exiting concurrently with our
42479f2da03SMatthew Dillon 	 * cleanup.  They will not lose the bitmask they obtained before so
42579f2da03SMatthew Dillon 	 * we can safely clear this bit.
42679f2da03SMatthew Dillon 	 */
42779f2da03SMatthew Dillon 	ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu);
42879f2da03SMatthew Dillon 	write_rflags(rflags);
42979f2da03SMatthew Dillon 	pmap_inval_done(pmap);
43079f2da03SMatthew Dillon 
43179f2da03SMatthew Dillon 	return opte;
43279f2da03SMatthew Dillon }
43379f2da03SMatthew Dillon 
43479f2da03SMatthew Dillon /*
435ccd67bf6SMatthew Dillon  * API function - invalidate the pte at (va) and replace *ptep with npte
436ccd67bf6SMatthew Dillon  * atomically only if *ptep equals opte, across the pmap's active cpus.
43779f2da03SMatthew Dillon  *
43879f2da03SMatthew Dillon  * Returns 1 on success, 0 on failure (caller typically retries).
43979f2da03SMatthew Dillon  */
44079f2da03SMatthew Dillon int
pmap_inval_smp_cmpset(pmap_t pmap,vm_offset_t va,pt_entry_t * ptep,pt_entry_t opte,pt_entry_t npte)44179f2da03SMatthew Dillon pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep,
44279f2da03SMatthew Dillon 		      pt_entry_t opte, pt_entry_t npte)
44379f2da03SMatthew Dillon {
44479f2da03SMatthew Dillon 	globaldata_t gd = mycpu;
44579f2da03SMatthew Dillon 	pmap_inval_info_t *info;
44679f2da03SMatthew Dillon 	int success;
44779f2da03SMatthew Dillon 	int cpu = gd->gd_cpuid;
44879f2da03SMatthew Dillon 	cpumask_t tmpmask;
44979f2da03SMatthew Dillon 	unsigned long rflags;
45079f2da03SMatthew Dillon 
45179f2da03SMatthew Dillon 	/*
4521a5c7e0fSMatthew Dillon 	 * Initialize invalidation for pmap and enter critical section.
45379f2da03SMatthew Dillon 	 */
45479f2da03SMatthew Dillon 	if (pmap == NULL)
455c713db65SAaron LI 		pmap = kernel_pmap;
4561a5c7e0fSMatthew Dillon 
4571a5c7e0fSMatthew Dillon 	/*
4581a5c7e0fSMatthew Dillon 	 * Shortcut single-cpu case if possible.
4591a5c7e0fSMatthew Dillon 	 */
46095270b7eSMatthew Dillon 	if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask) &&
46195270b7eSMatthew Dillon 	    pmap_inval_force_nonopt == 0) {
462e3c330f0SMatthew Dillon 		if (pmap->pm_flags & PMAP_MULTI)
463e3c330f0SMatthew Dillon 			pmap_inval_init(pmap);
46479f2da03SMatthew Dillon 		if (atomic_cmpset_long(ptep, opte, npte)) {
46579f2da03SMatthew Dillon 			if (va == (vm_offset_t)-1)
46679f2da03SMatthew Dillon 				cpu_invltlb();
46779f2da03SMatthew Dillon 			else
46879f2da03SMatthew Dillon 				cpu_invlpg((void *)va);
469e3c330f0SMatthew Dillon 			if (pmap->pm_flags & PMAP_MULTI)
47079f2da03SMatthew Dillon 				pmap_inval_done(pmap);
47179f2da03SMatthew Dillon 			return 1;
47279f2da03SMatthew Dillon 		} else {
473e3c330f0SMatthew Dillon 			if (pmap->pm_flags & PMAP_MULTI)
47479f2da03SMatthew Dillon 				pmap_inval_done(pmap);
47579f2da03SMatthew Dillon 			return 0;
47679f2da03SMatthew Dillon 		}
47779f2da03SMatthew Dillon 	}
47879f2da03SMatthew Dillon 
47979f2da03SMatthew Dillon 	/*
4801a5c7e0fSMatthew Dillon 	 * We need a critical section to prevent getting preempted while
4811a5c7e0fSMatthew Dillon 	 * we setup our command.  A preemption might execute its own
4821a5c7e0fSMatthew Dillon 	 * pmap_inval*() command and create confusion below.
4831a5c7e0fSMatthew Dillon 	 */
484e3c330f0SMatthew Dillon 	pmap_inval_init(pmap);
4851a5c7e0fSMatthew Dillon 	info = &invinfo[cpu];
4861a5c7e0fSMatthew Dillon 
4871a5c7e0fSMatthew Dillon 	/*
48879f2da03SMatthew Dillon 	 * We must wait for other cpus which may still be finishing
48979f2da03SMatthew Dillon 	 * up a prior operation.
49079f2da03SMatthew Dillon 	 */
49179f2da03SMatthew Dillon 	while (CPUMASK_TESTNZERO(info->done)) {
492bba35d66SMatthew Dillon #ifdef LOOPRECOVER
493bba35d66SMatthew Dillon 		if (loopwdog(info)) {
49479f2da03SMatthew Dillon 			info->failed = 1;
495bba35d66SMatthew Dillon 			loopdebug("B", info);
49679f2da03SMatthew Dillon 			/* XXX recover from possible bug */
49779f2da03SMatthew Dillon 			CPUMASK_ASSZERO(info->done);
49879f2da03SMatthew Dillon 		}
49979f2da03SMatthew Dillon #endif
50079f2da03SMatthew Dillon 		cpu_pause();
50179f2da03SMatthew Dillon 	}
50279f2da03SMatthew Dillon 	KKASSERT(info->mode == INVDONE);
50395270b7eSMatthew Dillon 	cpu_mfence();
50479f2da03SMatthew Dillon 
50579f2da03SMatthew Dillon 	/*
50679f2da03SMatthew Dillon 	 * Must set our cpu in the invalidation scan mask before
50779f2da03SMatthew Dillon 	 * any possibility of [partial] execution (remember, XINVLTLB
50879f2da03SMatthew Dillon 	 * can interrupt a critical section).
50979f2da03SMatthew Dillon 	 */
51079f2da03SMatthew Dillon 	ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu);
51179f2da03SMatthew Dillon 
51295270b7eSMatthew Dillon 	info->tsc_target = rdtsc() + (tsc_frequency * LOOPRECOVER_TIMEOUT1);
51379f2da03SMatthew Dillon 	info->va = va;
514ccd67bf6SMatthew Dillon 	info->npgs = 1;			/* unused */
51579f2da03SMatthew Dillon 	info->ptep = ptep;
51679f2da03SMatthew Dillon 	info->npte = npte;
51779f2da03SMatthew Dillon 	info->opte = opte;
518bba35d66SMatthew Dillon #ifdef LOOPRECOVER
51979f2da03SMatthew Dillon 	info->failed = 0;
5205dac90bcSMatthew Dillon #endif
5211a5c7e0fSMatthew Dillon 	info->mode = INVCMPSET;
5221a5c7e0fSMatthew Dillon 	info->success = 0;
5231a5c7e0fSMatthew Dillon 
52479f2da03SMatthew Dillon 	tmpmask = pmap->pm_active;	/* volatile */
52595270b7eSMatthew Dillon 	if (pmap_inval_force_allcpus)
52695270b7eSMatthew Dillon 		tmpmask = smp_active_mask;
52779f2da03SMatthew Dillon 	cpu_ccfence();
52879f2da03SMatthew Dillon 	CPUMASK_ANDMASK(tmpmask, smp_active_mask);
52979f2da03SMatthew Dillon 	CPUMASK_ORBIT(tmpmask, cpu);
5301a5c7e0fSMatthew Dillon 	info->mask = tmpmask;
53179f2da03SMatthew Dillon 
53279f2da03SMatthew Dillon 	/*
53379f2da03SMatthew Dillon 	 * Command may start executing the moment 'done' is initialized,
53479f2da03SMatthew Dillon 	 * disable current cpu interrupt to prevent 'done' field from
53579f2da03SMatthew Dillon 	 * changing (other cpus can't clear done bits until the originating
53679f2da03SMatthew Dillon 	 * cpu clears its mask bit).
53779f2da03SMatthew Dillon 	 */
538bba35d66SMatthew Dillon #ifdef LOOPRECOVER
53979f2da03SMatthew Dillon 	info->sigmask = tmpmask;
54079f2da03SMatthew Dillon 	CHECKSIGMASK(info);
54179f2da03SMatthew Dillon #endif
5421a5c7e0fSMatthew Dillon 	cpu_sfence();
5431a5c7e0fSMatthew Dillon 	rflags = read_rflags();
5441a5c7e0fSMatthew Dillon 	cpu_disable_intr();
5451a5c7e0fSMatthew Dillon 
5461a5c7e0fSMatthew Dillon 	ATOMIC_CPUMASK_COPY(info->done, tmpmask);
54779f2da03SMatthew Dillon 
54879f2da03SMatthew Dillon 	/*
5491a5c7e0fSMatthew Dillon 	 * Pass our copy of the done bits (so they don't change out from
5501a5c7e0fSMatthew Dillon 	 * under us) to generate the Xinvltlb interrupt on the targets.
55179f2da03SMatthew Dillon 	 */
55279f2da03SMatthew Dillon 	smp_invlpg(&tmpmask);
55379f2da03SMatthew Dillon 	success = info->success;
55479f2da03SMatthew Dillon 	KKASSERT(info->mode == INVDONE);
55579f2da03SMatthew Dillon 
55679f2da03SMatthew Dillon 	ATOMIC_CPUMASK_NANDBIT(smp_invmask, cpu);
55779f2da03SMatthew Dillon 	write_rflags(rflags);
55879f2da03SMatthew Dillon 	pmap_inval_done(pmap);
55979f2da03SMatthew Dillon 
56079f2da03SMatthew Dillon 	return success;
56179f2da03SMatthew Dillon }
56279f2da03SMatthew Dillon 
563ccd67bf6SMatthew Dillon void
pmap_inval_bulk_init(pmap_inval_bulk_t * bulk,struct pmap * pmap)564ccd67bf6SMatthew Dillon pmap_inval_bulk_init(pmap_inval_bulk_t *bulk, struct pmap *pmap)
565ccd67bf6SMatthew Dillon {
566ccd67bf6SMatthew Dillon 	bulk->pmap = pmap;
567ccd67bf6SMatthew Dillon 	bulk->va_beg = 0;
568ccd67bf6SMatthew Dillon 	bulk->va_end = 0;
569ccd67bf6SMatthew Dillon 	bulk->count = 0;
570ccd67bf6SMatthew Dillon }
571ccd67bf6SMatthew Dillon 
572ccd67bf6SMatthew Dillon pt_entry_t
pmap_inval_bulk(pmap_inval_bulk_t * bulk,vm_offset_t va,pt_entry_t * ptep,pt_entry_t npte)573ccd67bf6SMatthew Dillon pmap_inval_bulk(pmap_inval_bulk_t *bulk, vm_offset_t va,
574ccd67bf6SMatthew Dillon 		pt_entry_t *ptep, pt_entry_t npte)
575ccd67bf6SMatthew Dillon {
576ccd67bf6SMatthew Dillon 	pt_entry_t pte;
577ccd67bf6SMatthew Dillon 
578ccd67bf6SMatthew Dillon 	/*
579ccd67bf6SMatthew Dillon 	 * Degenerate case, localized or we don't care (e.g. because we
580ccd67bf6SMatthew Dillon 	 * are jacking the entire page table) or the pmap is not in-use
581ccd67bf6SMatthew Dillon 	 * by anyone.  No invalidations are done on any cpu.
582ccd67bf6SMatthew Dillon 	 */
583ccd67bf6SMatthew Dillon 	if (bulk == NULL) {
584ccd67bf6SMatthew Dillon 		pte = atomic_swap_long(ptep, npte);
585ccd67bf6SMatthew Dillon 		return pte;
586ccd67bf6SMatthew Dillon 	}
587ccd67bf6SMatthew Dillon 
588ccd67bf6SMatthew Dillon 	/*
589ccd67bf6SMatthew Dillon 	 * If it isn't the kernel pmap we execute the operation synchronously
590ccd67bf6SMatthew Dillon 	 * on all cpus belonging to the pmap, which avoids concurrency bugs in
591ccd67bf6SMatthew Dillon 	 * the hw related to changing pte's out from under threads.
592ccd67bf6SMatthew Dillon 	 *
593ccd67bf6SMatthew Dillon 	 * Eventually I would like to implement streaming pmap invalidation
594ccd67bf6SMatthew Dillon 	 * for user pmaps to reduce mmap/munmap overheads for heavily-loaded
595ccd67bf6SMatthew Dillon 	 * threaded programs.
596ccd67bf6SMatthew Dillon 	 */
597c713db65SAaron LI 	if (bulk->pmap != kernel_pmap) {
598ccd67bf6SMatthew Dillon 		pte = pmap_inval_smp(bulk->pmap, va, 1, ptep, npte);
599ccd67bf6SMatthew Dillon 		return pte;
600ccd67bf6SMatthew Dillon 	}
601ccd67bf6SMatthew Dillon 
602ccd67bf6SMatthew Dillon 	/*
603ccd67bf6SMatthew Dillon 	 * This is the kernel_pmap.  All unmap operations presume that there
604ccd67bf6SMatthew Dillon 	 * are no other cpus accessing the addresses in question.  Implement
605ccd67bf6SMatthew Dillon 	 * the bulking algorithm.  collect the required information and
606ccd67bf6SMatthew Dillon 	 * synchronize once at the end.
607ccd67bf6SMatthew Dillon 	 */
608ccd67bf6SMatthew Dillon 	pte = atomic_swap_long(ptep, npte);
609ccd67bf6SMatthew Dillon 	if (va == (vm_offset_t)-1) {
610ccd67bf6SMatthew Dillon 		bulk->va_beg = va;
611ccd67bf6SMatthew Dillon 	} else if (bulk->va_beg == bulk->va_end) {
612ccd67bf6SMatthew Dillon 		bulk->va_beg = va;
613ccd67bf6SMatthew Dillon 		bulk->va_end = va + PAGE_SIZE;
614ccd67bf6SMatthew Dillon 	} else if (va == bulk->va_end) {
615ccd67bf6SMatthew Dillon 		bulk->va_end = va + PAGE_SIZE;
616ccd67bf6SMatthew Dillon 	} else {
617ccd67bf6SMatthew Dillon 		bulk->va_beg = (vm_offset_t)-1;
618ccd67bf6SMatthew Dillon 		bulk->va_end = 0;
619ccd67bf6SMatthew Dillon #if 0
620ccd67bf6SMatthew Dillon 		pmap_inval_bulk_flush(bulk);
621ccd67bf6SMatthew Dillon 		bulk->count = 1;
622ccd67bf6SMatthew Dillon 		if (va == (vm_offset_t)-1) {
623ccd67bf6SMatthew Dillon 			bulk->va_beg = va;
624ccd67bf6SMatthew Dillon 			bulk->va_end = 0;
625ccd67bf6SMatthew Dillon 		} else {
626ccd67bf6SMatthew Dillon 			bulk->va_beg = va;
627ccd67bf6SMatthew Dillon 			bulk->va_end = va + PAGE_SIZE;
628ccd67bf6SMatthew Dillon 		}
629ccd67bf6SMatthew Dillon #endif
630ccd67bf6SMatthew Dillon 	}
631ccd67bf6SMatthew Dillon 	++bulk->count;
632ccd67bf6SMatthew Dillon 
633ccd67bf6SMatthew Dillon 	return pte;
634ccd67bf6SMatthew Dillon }
635ccd67bf6SMatthew Dillon 
636ccd67bf6SMatthew Dillon void
pmap_inval_bulk_flush(pmap_inval_bulk_t * bulk)637ccd67bf6SMatthew Dillon pmap_inval_bulk_flush(pmap_inval_bulk_t *bulk)
638ccd67bf6SMatthew Dillon {
639ccd67bf6SMatthew Dillon 	if (bulk == NULL)
640ccd67bf6SMatthew Dillon 		return;
641ccd67bf6SMatthew Dillon 	if (bulk->va_beg != bulk->va_end) {
642ccd67bf6SMatthew Dillon 		if (bulk->va_beg == (vm_offset_t)-1) {
643ccd67bf6SMatthew Dillon 			pmap_inval_smp(bulk->pmap, bulk->va_beg, 1, NULL, 0);
644ccd67bf6SMatthew Dillon 		} else {
64595270b7eSMatthew Dillon 			vm_pindex_t n;
646ccd67bf6SMatthew Dillon 
647ccd67bf6SMatthew Dillon 			n = (bulk->va_end - bulk->va_beg) >> PAGE_SHIFT;
648ccd67bf6SMatthew Dillon 			pmap_inval_smp(bulk->pmap, bulk->va_beg, n, NULL, 0);
649ccd67bf6SMatthew Dillon 		}
650ccd67bf6SMatthew Dillon 	}
651ccd67bf6SMatthew Dillon 	bulk->va_beg = 0;
652ccd67bf6SMatthew Dillon 	bulk->va_end = 0;
653ccd67bf6SMatthew Dillon 	bulk->count = 0;
654ccd67bf6SMatthew Dillon }
655ccd67bf6SMatthew Dillon 
65679f2da03SMatthew Dillon /*
65795270b7eSMatthew Dillon  * Called from Xinvl with a critical section held and interrupts enabled.
65879f2da03SMatthew Dillon  */
65979f2da03SMatthew Dillon int
pmap_inval_intr(cpumask_t * cpumaskp,int toolong)6601a5c7e0fSMatthew Dillon pmap_inval_intr(cpumask_t *cpumaskp, int toolong)
66179f2da03SMatthew Dillon {
66279f2da03SMatthew Dillon 	globaldata_t gd = mycpu;
66379f2da03SMatthew Dillon 	pmap_inval_info_t *info;
66479f2da03SMatthew Dillon 	int loopme = 0;
66579f2da03SMatthew Dillon 	int cpu;
66679f2da03SMatthew Dillon 	cpumask_t cpumask;
66779f2da03SMatthew Dillon 
66879f2da03SMatthew Dillon 	/*
66979f2da03SMatthew Dillon 	 * Check all cpus for invalidations we may need to service.
67079f2da03SMatthew Dillon 	 */
67179f2da03SMatthew Dillon 	cpu_ccfence();
67279f2da03SMatthew Dillon 	cpu = gd->gd_cpuid;
67379f2da03SMatthew Dillon 	cpumask = *cpumaskp;
67479f2da03SMatthew Dillon 
67579f2da03SMatthew Dillon         while (CPUMASK_TESTNZERO(cpumask)) {
67679f2da03SMatthew Dillon                 int n = BSFCPUMASK(cpumask);
67779f2da03SMatthew Dillon 
678bba35d66SMatthew Dillon #ifdef LOOPRECOVER
67979f2da03SMatthew Dillon 		KKASSERT(n >= 0 && n < MAXCPU);
68079f2da03SMatthew Dillon #endif
68179f2da03SMatthew Dillon 
68279f2da03SMatthew Dillon                 CPUMASK_NANDBIT(cpumask, n);
68379f2da03SMatthew Dillon 		info = &invinfo[n];
68479f2da03SMatthew Dillon 
68579f2da03SMatthew Dillon 		/*
68695270b7eSMatthew Dillon 		 * Checkout cpu (cpu) for work in the target cpu info (n)
68795270b7eSMatthew Dillon 		 *
68895270b7eSMatthew Dillon 		 * if (n == cpu) - check our cpu for a master operation
68995270b7eSMatthew Dillon 		 * if (n != cpu) - check other cpus for a slave operation
69095270b7eSMatthew Dillon 		 *
69179f2da03SMatthew Dillon 		 * Due to interrupts/races we can catch a new operation
69295270b7eSMatthew Dillon 		 * in an older interrupt in other cpus.
69395270b7eSMatthew Dillon 		 *
69495270b7eSMatthew Dillon 		 * A fence is needed once we detect the (not) done bit.
69579f2da03SMatthew Dillon 		 */
69679f2da03SMatthew Dillon 		if (!CPUMASK_TESTBIT(info->done, cpu))
69779f2da03SMatthew Dillon 			continue;
69879f2da03SMatthew Dillon 		cpu_lfence();
699bba35d66SMatthew Dillon #ifdef LOOPRECOVER
7001a5c7e0fSMatthew Dillon 		if (toolong) {
701*ab4aa0bbSMatthew Dillon 			kprintf("pm_inval_intr: WARNING, taking too long "
702*ab4aa0bbSMatthew Dillon 				"cpus=%d->%d done=%08jx mask=%08jx "
703*ab4aa0bbSMatthew Dillon 				"mode=%d\n",
7041a5c7e0fSMatthew Dillon 				cpu, n, info->done.ary[0], info->mask.ary[0],
7051a5c7e0fSMatthew Dillon 				info->mode);
7061a5c7e0fSMatthew Dillon 		}
7071a5c7e0fSMatthew Dillon #endif
70879f2da03SMatthew Dillon 
70979f2da03SMatthew Dillon 		/*
71079f2da03SMatthew Dillon 		 * info->mask and info->done always contain the originating
71179f2da03SMatthew Dillon 		 * cpu until the originator is done.  Targets may still be
71279f2da03SMatthew Dillon 		 * present in info->done after the originator is done (they
71379f2da03SMatthew Dillon 		 * will be finishing up their loops).
71479f2da03SMatthew Dillon 		 *
71579f2da03SMatthew Dillon 		 * Clear info->mask bits on other cpus to indicate that they
71679f2da03SMatthew Dillon 		 * have quiesced (entered the loop).  Once the other mask bits
71779f2da03SMatthew Dillon 		 * are clear we can execute the operation on the original,
71879f2da03SMatthew Dillon 		 * then clear the mask and done bits on the originator.  The
71979f2da03SMatthew Dillon 		 * targets will then finish up their side and clear their
72079f2da03SMatthew Dillon 		 * done bits.
72179f2da03SMatthew Dillon 		 *
72279f2da03SMatthew Dillon 		 * The command is considered 100% done when all done bits have
72379f2da03SMatthew Dillon 		 * been cleared.
72479f2da03SMatthew Dillon 		 */
72579f2da03SMatthew Dillon 		if (n != cpu) {
72679f2da03SMatthew Dillon 			/*
72779f2da03SMatthew Dillon 			 * Command state machine for 'other' cpus.
72879f2da03SMatthew Dillon 			 */
72979f2da03SMatthew Dillon 			if (CPUMASK_TESTBIT(info->mask, cpu)) {
73079f2da03SMatthew Dillon 				/*
73195270b7eSMatthew Dillon 				 * Other cpus indicate to originator that they
73279f2da03SMatthew Dillon 				 * are quiesced.
73379f2da03SMatthew Dillon 				 */
73479f2da03SMatthew Dillon 				ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
73579f2da03SMatthew Dillon 				loopme = 1;
736ccd67bf6SMatthew Dillon 			} else if (info->ptep &&
737ccd67bf6SMatthew Dillon 				   CPUMASK_TESTBIT(info->mask, n)) {
73879f2da03SMatthew Dillon 				/*
739ccd67bf6SMatthew Dillon 				 * Other cpu must wait for the originator (n)
740ccd67bf6SMatthew Dillon 				 * to complete its command if ptep is not NULL.
74179f2da03SMatthew Dillon 				 */
74279f2da03SMatthew Dillon 				loopme = 1;
74379f2da03SMatthew Dillon 			} else {
74479f2da03SMatthew Dillon 				/*
74579f2da03SMatthew Dillon 				 * Other cpu detects that the originator has
746ccd67bf6SMatthew Dillon 				 * completed its command, or there was no
747ccd67bf6SMatthew Dillon 				 * command.
748ccd67bf6SMatthew Dillon 				 *
749ccd67bf6SMatthew Dillon 				 * Now that the page table entry has changed,
750ccd67bf6SMatthew Dillon 				 * we can follow up with our own invalidation.
75179f2da03SMatthew Dillon 				 */
752ccd67bf6SMatthew Dillon 				vm_offset_t va = info->va;
75395270b7eSMatthew Dillon 				vm_pindex_t npgs;
754ccd67bf6SMatthew Dillon 
755ccd67bf6SMatthew Dillon 				if (va == (vm_offset_t)-1 ||
756ccd67bf6SMatthew Dillon 				    info->npgs > MAX_INVAL_PAGES) {
75779f2da03SMatthew Dillon 					cpu_invltlb();
758ccd67bf6SMatthew Dillon 				} else {
759ccd67bf6SMatthew Dillon 					for (npgs = info->npgs; npgs; --npgs) {
760ccd67bf6SMatthew Dillon 						cpu_invlpg((void *)va);
761ccd67bf6SMatthew Dillon 						va += PAGE_SIZE;
762ccd67bf6SMatthew Dillon 					}
763ccd67bf6SMatthew Dillon 				}
76479f2da03SMatthew Dillon 				ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
76579f2da03SMatthew Dillon 				/* info invalid now */
76679f2da03SMatthew Dillon 				/* loopme left alone */
76779f2da03SMatthew Dillon 			}
76879f2da03SMatthew Dillon 		} else if (CPUMASK_TESTBIT(info->mask, cpu)) {
76979f2da03SMatthew Dillon 			/*
77079f2da03SMatthew Dillon 			 * Originator is waiting for other cpus
77179f2da03SMatthew Dillon 			 */
77279f2da03SMatthew Dillon 			if (CPUMASK_CMPMASKNEQ(info->mask, gd->gd_cpumask)) {
77379f2da03SMatthew Dillon 				/*
77479f2da03SMatthew Dillon 				 * Originator waits for other cpus to enter
77579f2da03SMatthew Dillon 				 * their loop (aka quiesce).
776bba35d66SMatthew Dillon 				 *
777bba35d66SMatthew Dillon 				 * If this bugs out the IPI may have been lost,
778bba35d66SMatthew Dillon 				 * try to reissue by resetting our own
779bba35d66SMatthew Dillon 				 * reentrancy bit and clearing the smurf mask
780bba35d66SMatthew Dillon 				 * for the cpus that did not respond, then
781bba35d66SMatthew Dillon 				 * reissuing the IPI.
78279f2da03SMatthew Dillon 				 */
78379f2da03SMatthew Dillon 				loopme = 1;
784bba35d66SMatthew Dillon #ifdef LOOPRECOVER
785bba35d66SMatthew Dillon 				if (loopwdog(info)) {
78679f2da03SMatthew Dillon 					info->failed = 1;
787bba35d66SMatthew Dillon 					loopdebug("C", info);
78879f2da03SMatthew Dillon 					/* XXX recover from possible bug */
78967534613SMatthew Dillon 					cpu_disable_intr();
790bba35d66SMatthew Dillon 					ATOMIC_CPUMASK_NANDMASK(smp_smurf_mask,
791bba35d66SMatthew Dillon 								info->mask);
79279f2da03SMatthew Dillon 					smp_invlpg(&smp_active_mask);
793398af52eSMatthew Dillon 
794398af52eSMatthew Dillon 					/*
795398af52eSMatthew Dillon 					 * Force outer-loop retest of Xinvltlb
796398af52eSMatthew Dillon 					 * requests (see mp_machdep.c).
797398af52eSMatthew Dillon 					 */
7981a5c7e0fSMatthew Dillon 					cpu_enable_intr();
79979f2da03SMatthew Dillon 				}
80079f2da03SMatthew Dillon #endif
80179f2da03SMatthew Dillon 			} else {
80279f2da03SMatthew Dillon 				/*
80379f2da03SMatthew Dillon 				 * Originator executes operation and clears
80479f2da03SMatthew Dillon 				 * mask to allow other cpus to finish.
80579f2da03SMatthew Dillon 				 */
80679f2da03SMatthew Dillon 				KKASSERT(info->mode != INVDONE);
80779f2da03SMatthew Dillon 				if (info->mode == INVSTORE) {
808ccd67bf6SMatthew Dillon 					if (info->ptep)
809ccd67bf6SMatthew Dillon 						info->opte = atomic_swap_long(info->ptep, info->npte);
81079f2da03SMatthew Dillon 					CHECKSIGMASK(info);
81179f2da03SMatthew Dillon 					ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
81279f2da03SMatthew Dillon 					CHECKSIGMASK(info);
81379f2da03SMatthew Dillon 				} else {
81479f2da03SMatthew Dillon 					if (atomic_cmpset_long(info->ptep,
81579f2da03SMatthew Dillon 							      info->opte, info->npte)) {
81679f2da03SMatthew Dillon 						info->success = 1;
81779f2da03SMatthew Dillon 					} else {
81879f2da03SMatthew Dillon 						info->success = 0;
81979f2da03SMatthew Dillon 					}
82079f2da03SMatthew Dillon 					CHECKSIGMASK(info);
82179f2da03SMatthew Dillon 					ATOMIC_CPUMASK_NANDBIT(info->mask, cpu);
82279f2da03SMatthew Dillon 					CHECKSIGMASK(info);
82379f2da03SMatthew Dillon 				}
82479f2da03SMatthew Dillon 				loopme = 1;
82579f2da03SMatthew Dillon 			}
82679f2da03SMatthew Dillon 		} else {
82779f2da03SMatthew Dillon 			/*
82879f2da03SMatthew Dillon 			 * Originator does not have to wait for the other
82979f2da03SMatthew Dillon 			 * cpus to finish.  It clears its done bit.  A new
83079f2da03SMatthew Dillon 			 * command will not be initiated by the originator
83179f2da03SMatthew Dillon 			 * until the other cpus have cleared their done bits
83279f2da03SMatthew Dillon 			 * (asynchronously).
83379f2da03SMatthew Dillon 			 */
834ccd67bf6SMatthew Dillon 			vm_offset_t va = info->va;
83595270b7eSMatthew Dillon 			vm_pindex_t npgs;
836ccd67bf6SMatthew Dillon 
837ccd67bf6SMatthew Dillon 			if (va == (vm_offset_t)-1 ||
838ccd67bf6SMatthew Dillon 			    info->npgs > MAX_INVAL_PAGES) {
83979f2da03SMatthew Dillon 				cpu_invltlb();
840ccd67bf6SMatthew Dillon 			} else {
841ccd67bf6SMatthew Dillon 				for (npgs = info->npgs; npgs; --npgs) {
842ccd67bf6SMatthew Dillon 					cpu_invlpg((void *)va);
843ccd67bf6SMatthew Dillon 					va += PAGE_SIZE;
844ccd67bf6SMatthew Dillon 				}
845ccd67bf6SMatthew Dillon 			}
846bba35d66SMatthew Dillon 
84779f2da03SMatthew Dillon 			/* leave loopme alone */
84879f2da03SMatthew Dillon 			/* other cpus may still be finishing up */
84979f2da03SMatthew Dillon 			/* can't race originator since that's us */
85079f2da03SMatthew Dillon 			info->mode = INVDONE;
85179f2da03SMatthew Dillon 			ATOMIC_CPUMASK_NANDBIT(info->done, cpu);
85279f2da03SMatthew Dillon 		}
85379f2da03SMatthew Dillon         }
85479f2da03SMatthew Dillon 	return loopme;
855c2fb025dSMatthew Dillon }
856