xref: /dragonfly/sys/kern/kern_spinlock.c (revision 6d0742ae)
1b1af91cbSJeffrey Hsu /*
2b1af91cbSJeffrey Hsu  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3b1af91cbSJeffrey Hsu  *
4b1af91cbSJeffrey Hsu  * This code is derived from software contributed to The DragonFly Project
5d666840aSMatthew Dillon  * by Jeffrey M. Hsu. and Matthew Dillon
6b1af91cbSJeffrey Hsu  *
7b1af91cbSJeffrey Hsu  * Redistribution and use in source and binary forms, with or without
8b1af91cbSJeffrey Hsu  * modification, are permitted provided that the following conditions
9b1af91cbSJeffrey Hsu  * are met:
10b1af91cbSJeffrey Hsu  * 1. Redistributions of source code must retain the above copyright
11b1af91cbSJeffrey Hsu  *    notice, this list of conditions and the following disclaimer.
12b1af91cbSJeffrey Hsu  * 2. Redistributions in binary form must reproduce the above copyright
13b1af91cbSJeffrey Hsu  *    notice, this list of conditions and the following disclaimer in the
14b1af91cbSJeffrey Hsu  *    documentation and/or other materials provided with the distribution.
15b1af91cbSJeffrey Hsu  * 3. Neither the name of The DragonFly Project nor the names of its
16b1af91cbSJeffrey Hsu  *    contributors may be used to endorse or promote products derived
17b1af91cbSJeffrey Hsu  *    from this software without specific, prior written permission.
18b1af91cbSJeffrey Hsu  *
19b1af91cbSJeffrey Hsu  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20b1af91cbSJeffrey Hsu  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21b1af91cbSJeffrey Hsu  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22b1af91cbSJeffrey Hsu  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23b1af91cbSJeffrey Hsu  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24b1af91cbSJeffrey Hsu  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25b1af91cbSJeffrey Hsu  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26b1af91cbSJeffrey Hsu  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27b1af91cbSJeffrey Hsu  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28b1af91cbSJeffrey Hsu  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29b1af91cbSJeffrey Hsu  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30b1af91cbSJeffrey Hsu  * SUCH DAMAGE.
31b12defdcSMatthew Dillon  */
32b1af91cbSJeffrey Hsu 
330846e4ceSMatthew Dillon /*
340846e4ceSMatthew Dillon  * The implementation is designed to avoid looping when compatible operations
350846e4ceSMatthew Dillon  * are executed.
360846e4ceSMatthew Dillon  *
37d033fb32SMatthew Dillon  * To acquire a spinlock we first increment lock.  Then we check if lock
380846e4ceSMatthew Dillon  * meets our requirements.  For an exclusive spinlock it must be 1, of a
390846e4ceSMatthew Dillon  * shared spinlock it must either be 1 or the SHARED_SPINLOCK bit must be set.
400846e4ceSMatthew Dillon  *
410846e4ceSMatthew Dillon  * Shared spinlock failure case: Decrement the count, loop until we can
420846e4ceSMatthew Dillon  * transition from 0 to SHARED_SPINLOCK|1, or until we find SHARED_SPINLOCK
430846e4ceSMatthew Dillon  * is set and increment the count.
440846e4ceSMatthew Dillon  *
450846e4ceSMatthew Dillon  * Exclusive spinlock failure case: While maintaining the count, clear the
460846e4ceSMatthew Dillon  * SHARED_SPINLOCK flag unconditionally.  Then use an atomic add to transfer
47d033fb32SMatthew Dillon  * the count from the low bits to the high bits of lock.  Then loop until
480846e4ceSMatthew Dillon  * all low bits are 0.  Once the low bits drop to 0 we can transfer the
490846e4ceSMatthew Dillon  * count back with an atomic_cmpset_int(), atomically, and return.
500846e4ceSMatthew Dillon  */
51b1af91cbSJeffrey Hsu #include <sys/param.h>
52b02926deSMatthew Dillon #include <sys/systm.h>
53b1af91cbSJeffrey Hsu #include <sys/types.h>
54b02926deSMatthew Dillon #include <sys/kernel.h>
55b02926deSMatthew Dillon #include <sys/sysctl.h>
56b02926deSMatthew Dillon #ifdef INVARIANTS
57b02926deSMatthew Dillon #include <sys/proc.h>
58b02926deSMatthew Dillon #endif
592b3f93eaSMatthew Dillon #include <sys/caps.h>
60b1af91cbSJeffrey Hsu #include <machine/atomic.h>
61b12defdcSMatthew Dillon #include <machine/cpu.h>
62b02926deSMatthew Dillon #include <machine/cpufunc.h>
6310c66d57SSepherosa Ziehau #include <machine/specialreg.h>
64b02926deSMatthew Dillon #include <machine/clock.h>
655b49787bSMatthew Dillon #include <sys/indefinite2.h>
66b1af91cbSJeffrey Hsu #include <sys/spinlock.h>
6735a832dfSMatthew Dillon #include <sys/spinlock2.h>
6857aa743cSMatthew Dillon #include <sys/ktr.h>
69b1af91cbSJeffrey Hsu 
70ba87a4abSSascha Wildner struct spinlock pmap_spin = SPINLOCK_INITIALIZER(pmap_spin, "pmap_spin");
71492d98e6SMatthew Dillon 
7257aa743cSMatthew Dillon /*
7357aa743cSMatthew Dillon  * Kernal Trace
7457aa743cSMatthew Dillon  */
7557aa743cSMatthew Dillon #if !defined(KTR_SPIN_CONTENTION)
7657aa743cSMatthew Dillon #define KTR_SPIN_CONTENTION	KTR_ALL
7757aa743cSMatthew Dillon #endif
7857aa743cSMatthew Dillon #define SPIN_STRING	"spin=%p type=%c"
7957aa743cSMatthew Dillon #define SPIN_ARG_SIZE	(sizeof(void *) + sizeof(int))
8057aa743cSMatthew Dillon 
8157aa743cSMatthew Dillon KTR_INFO_MASTER(spin);
8243e72e79SMatthew Dillon #if 0
8357aa743cSMatthew Dillon KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE);
8457aa743cSMatthew Dillon KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE);
8543e72e79SMatthew Dillon #endif
8657aa743cSMatthew Dillon 
87b12defdcSMatthew Dillon #define logspin(name, spin, type)			\
88b12defdcSMatthew Dillon 	KTR_LOG(spin_ ## name, spin, type)
8910c66d57SSepherosa Ziehau 
90b02926deSMatthew Dillon #ifdef INVARIANTS
91b02926deSMatthew Dillon static int spin_lock_test_mode;
92b02926deSMatthew Dillon #endif
93b02926deSMatthew Dillon 
94bb4ae18cSMatthew Dillon #ifdef DEBUG_LOCKS_LATENCY
95bb4ae18cSMatthew Dillon 
96288f331fSMatthew Dillon __read_frequently static long spinlocks_add_latency;
97bb4ae18cSMatthew Dillon SYSCTL_LONG(_debug, OID_AUTO, spinlocks_add_latency, CTLFLAG_RW,
98bb4ae18cSMatthew Dillon     &spinlocks_add_latency, 0,
99bb4ae18cSMatthew Dillon     "Add spinlock latency");
100bb4ae18cSMatthew Dillon 
101bb4ae18cSMatthew Dillon #endif
102bb4ae18cSMatthew Dillon 
103288f331fSMatthew Dillon __read_frequently static long spin_backoff_max = 4096;
104cc705b82SMatthew Dillon SYSCTL_LONG(_debug, OID_AUTO, spin_backoff_max, CTLFLAG_RW,
105cc705b82SMatthew Dillon     &spin_backoff_max, 0,
106cc705b82SMatthew Dillon     "Spinlock exponential backoff limit");
107288f331fSMatthew Dillon 
108288f331fSMatthew Dillon /* 1 << n clock cycles, approx */
109288f331fSMatthew Dillon __read_frequently static long spin_window_shift = 8;
110cc705b82SMatthew Dillon SYSCTL_LONG(_debug, OID_AUTO, spin_window_shift, CTLFLAG_RW,
111cc705b82SMatthew Dillon     &spin_window_shift, 0,
112cc705b82SMatthew Dillon     "Spinlock TSC windowing");
113cc705b82SMatthew Dillon 
1144badc135SMatthew Dillon __read_frequently int indefinite_uses_rdtsc = 1;
1154badc135SMatthew Dillon SYSCTL_INT(_debug, OID_AUTO, indefinite_uses_rdtsc, CTLFLAG_RW,
1164badc135SMatthew Dillon     &indefinite_uses_rdtsc, 0,
1174badc135SMatthew Dillon     "Indefinite code uses RDTSC");
1184badc135SMatthew Dillon 
119b02926deSMatthew Dillon /*
1208f165b8cSMatthew Dillon  * We contested due to another exclusive lock holder.  We lose.
121b12defdcSMatthew Dillon  *
122b12defdcSMatthew Dillon  * We have to unwind the attempt and may acquire the spinlock
12301be7a8fSMatthew Dillon  * anyway while doing so.
124b02926deSMatthew Dillon  */
125d666840aSMatthew Dillon int
spin_trylock_contested(struct spinlock * spin)126b12defdcSMatthew Dillon spin_trylock_contested(struct spinlock *spin)
127b02926deSMatthew Dillon {
128b12defdcSMatthew Dillon 	globaldata_t gd = mycpu;
129b12defdcSMatthew Dillon 
13001be7a8fSMatthew Dillon 	/*
13101be7a8fSMatthew Dillon 	 * Handle degenerate case, else fail.
13201be7a8fSMatthew Dillon 	 */
133d033fb32SMatthew Dillon 	if (atomic_cmpset_int(&spin->lock, SPINLOCK_SHARED|0, 1))
13401be7a8fSMatthew Dillon 		return TRUE;
135d033fb32SMatthew Dillon 	/*atomic_add_int(&spin->lock, -1);*/
1360846e4ceSMatthew Dillon 	--gd->gd_spinlocks;
137e8b1691fSMatthew Dillon 	crit_exit_quick(gd->gd_curthread);
138a4d95680SMatthew Dillon 
139d666840aSMatthew Dillon 	return (FALSE);
140d666840aSMatthew Dillon }
141d666840aSMatthew Dillon 
142d666840aSMatthew Dillon /*
14301be7a8fSMatthew Dillon  * The spin_lock() inline was unable to acquire the lock and calls this
144d033fb32SMatthew Dillon  * function with spin->lock already incremented, passing (spin->lock - 1)
145e22f2acdSMatthew Dillon  * to the function (the result of the inline's fetchadd).
146d666840aSMatthew Dillon  *
1471b8fb8d2SMatthew Dillon  * Note that we implement both exclusive and shared spinlocks, so we cannot
1481b8fb8d2SMatthew Dillon  * use atomic_swap_int().  Instead, we try to use atomic_fetchadd_int()
1491b8fb8d2SMatthew Dillon  * to put most of the burden on the cpu.  Atomic_cmpset_int() (cmpxchg)
1501b8fb8d2SMatthew Dillon  * can cause a lot of unnecessary looping in situations where it is just
1511b8fb8d2SMatthew Dillon  * trying to increment the count.
15243e72e79SMatthew Dillon  *
1531b8fb8d2SMatthew Dillon  * Similarly, we leave the SHARED flag intact and incur slightly more
1541b8fb8d2SMatthew Dillon  * overhead when switching from shared to exclusive.  This allows us to
1551b8fb8d2SMatthew Dillon  * use atomic_fetchadd_int() for both spinlock types in the critical
1561b8fb8d2SMatthew Dillon  * path.
1570846e4ceSMatthew Dillon  *
158cc705b82SMatthew Dillon  * The exponential (n^1.5) backoff algorithm is designed to both reduce
159cc705b82SMatthew Dillon  * cache bus contention between cpu cores and sockets, and to allow some
160cc705b82SMatthew Dillon  * bursting of exclusive locks in heavily contended situations to improve
161cc705b82SMatthew Dillon  * performance.
162cc705b82SMatthew Dillon  *
163cc705b82SMatthew Dillon  * The exclusive lock priority mechanism prevents even heavily contended
164cc705b82SMatthew Dillon  * exclusive locks from being starved by shared locks
165d666840aSMatthew Dillon  */
166d666840aSMatthew Dillon void
_spin_lock_contested(struct spinlock * spin,const char * ident,int value)167cff27badSMatthew Dillon _spin_lock_contested(struct spinlock *spin, const char *ident, int value)
168d666840aSMatthew Dillon {
169b1793cc6SMatthew Dillon 	indefinite_info_t info;
170ae4025a1SMatthew Dillon 	uint32_t ovalue;
171cc705b82SMatthew Dillon 	long expbackoff;
172cc705b82SMatthew Dillon 	long loop;
173d666840aSMatthew Dillon 
1740846e4ceSMatthew Dillon 	/*
175e22f2acdSMatthew Dillon 	 * WARNING! Caller has already incremented the lock.  We must
176e22f2acdSMatthew Dillon 	 *	    increment the count value (from the inline's fetch-add)
177e22f2acdSMatthew Dillon 	 *	    to match.
178e22f2acdSMatthew Dillon 	 *
179e22f2acdSMatthew Dillon 	 * Handle the degenerate case where the spinlock is flagged SHARED
180e22f2acdSMatthew Dillon 	 * with only our reference.  We can convert it to EXCLUSIVE.
18101be7a8fSMatthew Dillon 	 */
182cc705b82SMatthew Dillon 	if (value == (SPINLOCK_SHARED | 1) - 1) {
183d033fb32SMatthew Dillon 		if (atomic_cmpset_int(&spin->lock, SPINLOCK_SHARED | 1, 1))
18401be7a8fSMatthew Dillon 			return;
185cff27badSMatthew Dillon 	}
186cc705b82SMatthew Dillon 	/* ++value; value not used after this */
187cc705b82SMatthew Dillon 	info.type = 0;		/* avoid improper gcc warning */
188cc705b82SMatthew Dillon 	info.ident = NULL;	/* avoid improper gcc warning */
1894badc135SMatthew Dillon 	info.secs = 0;		/* avoid improper gcc warning */
1904badc135SMatthew Dillon 	info.base = 0;		/* avoid improper gcc warning */
1912a404fe0Szrj 	info.count = 0;		/* avoid improper gcc warning */
192cc705b82SMatthew Dillon 	expbackoff = 0;
19301be7a8fSMatthew Dillon 
19401be7a8fSMatthew Dillon 	/*
195e22f2acdSMatthew Dillon 	 * Transfer our exclusive request to the high bits and clear the
196e22f2acdSMatthew Dillon 	 * SPINLOCK_SHARED bit if it was set.  This makes the spinlock
197e22f2acdSMatthew Dillon 	 * appear exclusive, preventing any NEW shared or exclusive
198e22f2acdSMatthew Dillon 	 * spinlocks from being obtained while we wait for existing
199e22f2acdSMatthew Dillon 	 * shared or exclusive holders to unlock.
20079a7c522SMatthew Dillon 	 *
201e22f2acdSMatthew Dillon 	 * Don't tread on earlier exclusive waiters by stealing the lock
202e22f2acdSMatthew Dillon 	 * away early if the low bits happen to now be 1.
203e22f2acdSMatthew Dillon 	 *
204e22f2acdSMatthew Dillon 	 * The shared unlock understands that this may occur.
2050846e4ceSMatthew Dillon 	 */
206d033fb32SMatthew Dillon 	ovalue = atomic_fetchadd_int(&spin->lock, SPINLOCK_EXCLWAIT - 1);
207ae4025a1SMatthew Dillon 	ovalue += SPINLOCK_EXCLWAIT - 1;
2089abb66c5SMatthew Dillon 	if (ovalue & SPINLOCK_SHARED) {
209d033fb32SMatthew Dillon 		atomic_clear_int(&spin->lock, SPINLOCK_SHARED);
210ae4025a1SMatthew Dillon 		ovalue &= ~SPINLOCK_SHARED;
211ae4025a1SMatthew Dillon 	}
2120846e4ceSMatthew Dillon 
2130846e4ceSMatthew Dillon 	for (;;) {
214cc705b82SMatthew Dillon 		expbackoff = (expbackoff + 1) * 3 / 2;
215cc705b82SMatthew Dillon 		if (expbackoff == 6)		/* 1, 3, 6, 10, ... */
216*6d0742aeSMatthew Dillon 			indefinite_init(&info, spin, ident, 0, 'S');
2174badc135SMatthew Dillon 		if (indefinite_uses_rdtsc) {
218cc705b82SMatthew Dillon 			if ((rdtsc() >> spin_window_shift) % ncpus != mycpuid)  {
219cc705b82SMatthew Dillon 				for (loop = expbackoff; loop; --loop)
220cc705b82SMatthew Dillon 					cpu_pause();
221cc705b82SMatthew Dillon 			}
2224badc135SMatthew Dillon 		}
223cc705b82SMatthew Dillon 		/*cpu_lfence();*/
224cc705b82SMatthew Dillon 
2250846e4ceSMatthew Dillon 		/*
2260846e4ceSMatthew Dillon 		 * If the low bits are zero, try to acquire the exclusive lock
227e22f2acdSMatthew Dillon 		 * by transfering our high bit reservation to the low bits.
2280846e4ceSMatthew Dillon 		 *
229cc705b82SMatthew Dillon 		 * NOTE: Avoid unconditional atomic op by testing ovalue,
230cc705b82SMatthew Dillon 		 *	 otherwise we get cache bus armageddon.
231c5cfe2c8SMatthew Dillon 		 *
232c5cfe2c8SMatthew Dillon 		 * NOTE: We must also ensure that the SHARED bit is cleared.
233c5cfe2c8SMatthew Dillon 		 *	 It is possible for it to wind up being set on a
234c5cfe2c8SMatthew Dillon 		 *	 shared lock override of the EXCLWAIT bits.
2350846e4ceSMatthew Dillon 		 */
236d033fb32SMatthew Dillon 		ovalue = spin->lock;
2370846e4ceSMatthew Dillon 		cpu_ccfence();
238ae4025a1SMatthew Dillon 		if ((ovalue & (SPINLOCK_EXCLWAIT - 1)) == 0) {
239a18b747cSMatthew Dillon 			uint32_t nvalue;
240a18b747cSMatthew Dillon 
241a18b747cSMatthew Dillon 			nvalue= ((ovalue - SPINLOCK_EXCLWAIT) | 1) &
242a18b747cSMatthew Dillon 				~SPINLOCK_SHARED;
243d033fb32SMatthew Dillon 			if (atomic_fcmpset_int(&spin->lock, &ovalue, nvalue))
2440846e4ceSMatthew Dillon 				break;
245ae4025a1SMatthew Dillon 			continue;
246ae4025a1SMatthew Dillon 		}
247cc705b82SMatthew Dillon 		if (expbackoff > 6 + spin_backoff_max)
248cc705b82SMatthew Dillon 			expbackoff = 6 + spin_backoff_max;
249cc705b82SMatthew Dillon 		if (expbackoff >= 6) {
250b1793cc6SMatthew Dillon 			if (indefinite_check(&info))
2510846e4ceSMatthew Dillon 				break;
2520846e4ceSMatthew Dillon 		}
253cc705b82SMatthew Dillon 	}
254cc705b82SMatthew Dillon 	if (expbackoff >= 6)
255b1793cc6SMatthew Dillon 		indefinite_done(&info);
2560846e4ceSMatthew Dillon }
2570846e4ceSMatthew Dillon 
2580846e4ceSMatthew Dillon /*
259e22f2acdSMatthew Dillon  * The spin_lock_shared() inline was unable to acquire the lock and calls
260d033fb32SMatthew Dillon  * this function with spin->lock already incremented.
26179a7c522SMatthew Dillon  *
262e22f2acdSMatthew Dillon  * This is not in the critical path unless there is contention between
263e22f2acdSMatthew Dillon  * shared and exclusive holders.
264cc705b82SMatthew Dillon  *
265cc705b82SMatthew Dillon  * Exclusive locks have priority over shared locks.  However, this can
266cc705b82SMatthew Dillon  * cause shared locks to be starved when large numbers of threads are
267cc705b82SMatthew Dillon  * competing for exclusive locks so the shared lock code uses TSC-windowing
268cc705b82SMatthew Dillon  * to selectively ignore the exclusive priority mechanism.  This has the
269cc705b82SMatthew Dillon  * effect of allowing a limited number of shared locks to compete against
270cc705b82SMatthew Dillon  * exclusive waiters at any given moment.
271cc705b82SMatthew Dillon  *
272cc705b82SMatthew Dillon  * Note that shared locks do not implement exponential backoff.  Instead,
273cc705b82SMatthew Dillon  * the shared lock simply polls the lock value.  One cpu_pause() is built
274cc705b82SMatthew Dillon  * into indefinite_check().
2750846e4ceSMatthew Dillon  */
2760846e4ceSMatthew Dillon void
_spin_lock_shared_contested(struct spinlock * spin,const char * ident)277e22f2acdSMatthew Dillon _spin_lock_shared_contested(struct spinlock *spin, const char *ident)
2780846e4ceSMatthew Dillon {
279b1793cc6SMatthew Dillon 	indefinite_info_t info;
280ae4025a1SMatthew Dillon 	uint32_t ovalue;
2815b49787bSMatthew Dillon 
282e22f2acdSMatthew Dillon 	/*
283e22f2acdSMatthew Dillon 	 * Undo the inline's increment.
284e22f2acdSMatthew Dillon 	 */
285d033fb32SMatthew Dillon 	ovalue = atomic_fetchadd_int(&spin->lock, -1) - 1;
286e22f2acdSMatthew Dillon 
287*6d0742aeSMatthew Dillon 	indefinite_init(&info, spin, ident, 0, 's');
2889abb66c5SMatthew Dillon 	cpu_pause();
2899abb66c5SMatthew Dillon 
2900846e4ceSMatthew Dillon #ifdef DEBUG_LOCKS_LATENCY
2910846e4ceSMatthew Dillon 	long j;
2920846e4ceSMatthew Dillon 	for (j = spinlocks_add_latency; j > 0; --j)
2930846e4ceSMatthew Dillon 		cpu_ccfence();
2940846e4ceSMatthew Dillon #endif
29543e72e79SMatthew Dillon 
29643e72e79SMatthew Dillon 	for (;;) {
29743e72e79SMatthew Dillon 		/*
29879a7c522SMatthew Dillon 		 * Loop until we can acquire the shared spinlock.  Note that
29979a7c522SMatthew Dillon 		 * the low bits can be zero while the high EXCLWAIT bits are
30079a7c522SMatthew Dillon 		 * non-zero.  In this situation exclusive requesters have
30179a7c522SMatthew Dillon 		 * priority (otherwise shared users on multiple cpus can hog
30279a7c522SMatthew Dillon 		 * the spinlnock).
30379a7c522SMatthew Dillon 		 *
304d033fb32SMatthew Dillon 		 * NOTE: Reading spin->lock prior to the swap is extremely
30543e72e79SMatthew Dillon 		 *	 important on multi-chip/many-core boxes.  On 48-core
30643e72e79SMatthew Dillon 		 *	 this one change improves fully concurrent all-cores
30743e72e79SMatthew Dillon 		 *	 compiles by 100% or better.
30843e72e79SMatthew Dillon 		 *
3090846e4ceSMatthew Dillon 		 *	 I can't emphasize enough how important the pre-read
3100846e4ceSMatthew Dillon 		 *	 is in preventing hw cache bus armageddon on
3110846e4ceSMatthew Dillon 		 *	 multi-chip systems.  And on single-chip/multi-core
3120846e4ceSMatthew Dillon 		 *	 systems it just doesn't hurt.
31343e72e79SMatthew Dillon 		 */
3140846e4ceSMatthew Dillon 		cpu_ccfence();
315cc705b82SMatthew Dillon 
316cc705b82SMatthew Dillon 		/*
317cc705b82SMatthew Dillon 		 * Ignore the EXCLWAIT bits if we are inside our window.
318aab1a048SMatthew Dillon 		 *
319aab1a048SMatthew Dillon 		 * We must always use a windowing approach here or the
320aab1a048SMatthew Dillon 		 * EXCLWAIT bits can prevent the shared lock from ever
321aab1a048SMatthew Dillon 		 * resolving... permanent starvation.
322aab1a048SMatthew Dillon 		 *
323aab1a048SMatthew Dillon 		 * In addition, if we were to always ignore the EXCLWAIT
324aab1a048SMatthew Dillon 		 * bits overlapping shared locks can prevent an exclusive
325aab1a048SMatthew Dillon 		 * lock from ever resolving... permanent starvation again.
326cc705b82SMatthew Dillon 		 */
327aab1a048SMatthew Dillon 		if (/*indefinite_uses_rdtsc &&*/
3284badc135SMatthew Dillon 		    (ovalue & (SPINLOCK_EXCLWAIT - 1)) == 0 &&
329cc705b82SMatthew Dillon 		    (rdtsc() >> spin_window_shift) % ncpus == mycpuid)  {
330d033fb32SMatthew Dillon 			if (atomic_fcmpset_int(&spin->lock, &ovalue,
331cc705b82SMatthew Dillon 					       ovalue | SPINLOCK_SHARED | 1)) {
33243e72e79SMatthew Dillon 				break;
333ae4025a1SMatthew Dillon 			}
334ae4025a1SMatthew Dillon 			continue;
335ae4025a1SMatthew Dillon 		}
33697cfa330SMatthew Dillon 
337ae4025a1SMatthew Dillon 		/*
338cc705b82SMatthew Dillon 		 * Check ovalue tightly (no exponential backoff for shared
339cc705b82SMatthew Dillon 		 * locks, that would result in horrible performance.  Instead,
340cc705b82SMatthew Dillon 		 * shared locks depend on the exclusive priority mechanism
341cc705b82SMatthew Dillon 		 * to avoid starving exclusive locks).
342ae4025a1SMatthew Dillon 		 */
343cc705b82SMatthew Dillon 		if (ovalue == 0) {
344d033fb32SMatthew Dillon 			if (atomic_fcmpset_int(&spin->lock, &ovalue,
345cc705b82SMatthew Dillon 					      SPINLOCK_SHARED | 1)) {
34697cfa330SMatthew Dillon 				break;
34797cfa330SMatthew Dillon 			}
34897cfa330SMatthew Dillon 			continue;
34997cfa330SMatthew Dillon 		}
35097cfa330SMatthew Dillon 
35197cfa330SMatthew Dillon 		/*
35297cfa330SMatthew Dillon 		 * If SHARED is already set, go for the increment, improving
35397cfa330SMatthew Dillon 		 * the exclusive to multiple-readers transition.
35497cfa330SMatthew Dillon 		 */
35597cfa330SMatthew Dillon 		if (ovalue & SPINLOCK_SHARED) {
356d033fb32SMatthew Dillon 			ovalue = atomic_fetchadd_int(&spin->lock, 1);
357ae4025a1SMatthew Dillon 			/* ovalue += 1; NOT NEEDED */
358ae4025a1SMatthew Dillon 			if (ovalue & SPINLOCK_SHARED)
3590846e4ceSMatthew Dillon 				break;
360d033fb32SMatthew Dillon 			ovalue = atomic_fetchadd_int(&spin->lock, -1);
361ae4025a1SMatthew Dillon 			ovalue += -1;
362ae4025a1SMatthew Dillon 			continue;
3630846e4ceSMatthew Dillon 		}
364b1793cc6SMatthew Dillon 		if (indefinite_check(&info))
3658f165b8cSMatthew Dillon 			break;
366ae4025a1SMatthew Dillon 		/*
367ae4025a1SMatthew Dillon 		 * ovalue was wrong anyway, just reload
368ae4025a1SMatthew Dillon 		 */
369d033fb32SMatthew Dillon 		ovalue = spin->lock;
370b12defdcSMatthew Dillon 	}
371b1793cc6SMatthew Dillon 	indefinite_done(&info);
372b02926deSMatthew Dillon }
373b02926deSMatthew Dillon 
374b02926deSMatthew Dillon /*
3754badc135SMatthew Dillon  * Automatically avoid use of rdtsc when running in a VM
3764badc135SMatthew Dillon  */
3774badc135SMatthew Dillon static void
spinlock_sysinit(void * dummy __unused)3784badc135SMatthew Dillon spinlock_sysinit(void *dummy __unused)
3794badc135SMatthew Dillon {
3804badc135SMatthew Dillon 	if (vmm_guest)
3814badc135SMatthew Dillon 		indefinite_uses_rdtsc = 0;
3824badc135SMatthew Dillon }
3834badc135SMatthew Dillon SYSINIT(spinsysinit, SI_BOOT2_PROC0, SI_ORDER_FIRST, spinlock_sysinit, NULL);
3844badc135SMatthew Dillon 
3854badc135SMatthew Dillon 
3864badc135SMatthew Dillon /*
387d666840aSMatthew Dillon  * If INVARIANTS is enabled various spinlock timing tests can be run
388d666840aSMatthew Dillon  * by setting debug.spin_lock_test:
389d666840aSMatthew Dillon  *
390d666840aSMatthew Dillon  *	1	Test the indefinite wait code
391d666840aSMatthew Dillon  *	2	Time the best-case exclusive lock overhead (spin_test_count)
392d666840aSMatthew Dillon  *	3	Time the best-case shared lock overhead (spin_test_count)
393b02926deSMatthew Dillon  */
394b02926deSMatthew Dillon 
395b02926deSMatthew Dillon #ifdef INVARIANTS
396b02926deSMatthew Dillon 
397d666840aSMatthew Dillon static int spin_test_count = 10000000;
3980c52fa62SSamuel J. Greear SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0,
3990c52fa62SSamuel J. Greear     "Number of iterations to use for spinlock wait code test");
400d666840aSMatthew Dillon 
401b02926deSMatthew Dillon static int
sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)402b02926deSMatthew Dillon sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)
403b02926deSMatthew Dillon {
404b12defdcSMatthew Dillon         struct spinlock spin;
405b02926deSMatthew Dillon 	int error;
406b02926deSMatthew Dillon 	int value = 0;
407d666840aSMatthew Dillon 	int i;
408b02926deSMatthew Dillon 
4092b3f93eaSMatthew Dillon 	if ((error = caps_priv_check_self(SYSCAP_RESTRICTEDROOT)) != 0)
410b02926deSMatthew Dillon 		return (error);
411b02926deSMatthew Dillon 	if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0)
412b02926deSMatthew Dillon 		return (error);
413b02926deSMatthew Dillon 
414d666840aSMatthew Dillon 	/*
415d666840aSMatthew Dillon 	 * Indefinite wait test
416d666840aSMatthew Dillon 	 */
417b02926deSMatthew Dillon 	if (value == 1) {
418ba87a4abSSascha Wildner 		spin_init(&spin, "sysctllock");
419b12defdcSMatthew Dillon 		spin_lock(&spin);	/* force an indefinite wait */
420b02926deSMatthew Dillon 		spin_lock_test_mode = 1;
421b12defdcSMatthew Dillon 		spin_lock(&spin);
422b12defdcSMatthew Dillon 		spin_unlock(&spin);	/* Clean up the spinlock count */
423b12defdcSMatthew Dillon 		spin_unlock(&spin);
424b02926deSMatthew Dillon 		spin_lock_test_mode = 0;
425b02926deSMatthew Dillon 	}
426d666840aSMatthew Dillon 
427d666840aSMatthew Dillon 	/*
428d666840aSMatthew Dillon 	 * Time best-case exclusive spinlocks
429d666840aSMatthew Dillon 	 */
430d666840aSMatthew Dillon 	if (value == 2) {
431d666840aSMatthew Dillon 		globaldata_t gd = mycpu;
432d666840aSMatthew Dillon 
433ba87a4abSSascha Wildner 		spin_init(&spin, "sysctllocktest");
434d666840aSMatthew Dillon 		for (i = spin_test_count; i > 0; --i) {
435050032ecSMatthew Dillon 		    _spin_lock_quick(gd, &spin, "test");
436b12defdcSMatthew Dillon 		    spin_unlock_quick(gd, &spin);
437d666840aSMatthew Dillon 		}
438d666840aSMatthew Dillon 	}
439d666840aSMatthew Dillon 
440b02926deSMatthew Dillon         return (0);
441b02926deSMatthew Dillon }
442b02926deSMatthew Dillon 
443b02926deSMatthew Dillon SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT,
444b02926deSMatthew Dillon         0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code");
445b02926deSMatthew Dillon 
446d666840aSMatthew Dillon #endif	/* INVARIANTS */
447