xref: /dragonfly/sys/kern/kern_spinlock.c (revision 7bc7e232)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu. and Matthew Dillon
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $DragonFly: src/sys/kern/kern_spinlock.c,v 1.11 2007/07/02 16:51:58 dillon Exp $
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/types.h>
38 #include <sys/kernel.h>
39 #include <sys/sysctl.h>
40 #ifdef INVARIANTS
41 #include <sys/proc.h>
42 #endif
43 #include <ddb/ddb.h>
44 #include <machine/atomic.h>
45 #include <machine/cpufunc.h>
46 #include <machine/clock.h>
47 #include <sys/spinlock.h>
48 #include <sys/spinlock2.h>
49 #include <sys/ktr.h>
50 
51 #define	BACKOFF_INITIAL	1
52 #define	BACKOFF_LIMIT	256
53 
54 #ifdef SMP
55 
56 /*
57  * Kernal Trace
58  */
59 #if !defined(KTR_SPIN_CONTENTION)
60 #define KTR_SPIN_CONTENTION	KTR_ALL
61 #endif
62 #define SPIN_STRING	"spin=%p type=%c"
63 #define SPIN_ARG_SIZE	(sizeof(void *) + sizeof(int))
64 
65 KTR_INFO_MASTER(spin);
66 KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE);
67 KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE);
68 
69 #define logspin(name, mtx, type)			\
70 	KTR_LOG(spin_ ## name, mtx, type)
71 
72 #ifdef INVARIANTS
73 static int spin_lock_test_mode;
74 #endif
75 
76 static int64_t spinlocks_contested1;
77 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD, &spinlocks_contested1, 0, "");
78 static int64_t spinlocks_contested2;
79 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD, &spinlocks_contested2, 0, "");
80 
81 struct exponential_backoff {
82 	int backoff;
83 	int nsec;
84 	struct spinlock *mtx;
85 	sysclock_t base;
86 };
87 static int exponential_backoff(struct exponential_backoff *bo);
88 
89 static __inline
90 void
91 exponential_init(struct exponential_backoff *bo, struct spinlock *mtx)
92 {
93 	bo->backoff = BACKOFF_INITIAL;
94 	bo->nsec = 0;
95 	bo->mtx = mtx;
96 }
97 
98 /*
99  * We were either contested due to another exclusive lock holder,
100  * or due to the presence of shared locks.  We have to undo the mess
101  * we created by returning the shared locks.
102  *
103  * If there was another exclusive lock holder only the exclusive bit
104  * in value will be the only bit set.  We don't have to do anything since
105  * restoration does not involve any work.
106  *
107  * Otherwise we successfully obtained the exclusive bit.  Attempt to
108  * clear the shared bits.  If we are able to clear the shared bits
109  * we win.  Otherwise we lose and we have to restore the shared bits
110  * we couldn't clear (and also clear our exclusive bit).
111  */
112 int
113 spin_trylock_wr_contested(struct spinlock *mtx, int value)
114 {
115 	int bit;
116 
117 	++spinlocks_contested1;
118 	if ((value & SPINLOCK_EXCLUSIVE) == 0) {
119 		while (value) {
120 			bit = bsfl(value);
121 			if (globaldata_find(bit)->gd_spinlock_rd != mtx) {
122 				atomic_swap_int(&mtx->lock, value);
123 				return (FALSE);
124 			}
125 			value &= ~(1 << bit);
126 		}
127 		return (TRUE);
128 	}
129 	return (FALSE);
130 }
131 
132 /*
133  * We were either contested due to another exclusive lock holder,
134  * or due to the presence of shared locks
135  *
136  * NOTE: If value indicates an exclusively held mutex, no shared bits
137  * would have been set and we can throw away value.
138  */
139 void
140 spin_lock_wr_contested(struct spinlock *mtx, int value)
141 {
142 	struct exponential_backoff backoff;
143 	globaldata_t gd = mycpu;
144 	int bit;
145 	int mask;
146 
147 	/*
148 	 * Wait until we can gain exclusive access vs another exclusive
149 	 * holder.
150 	 */
151 	exponential_init(&backoff, mtx);
152 	++spinlocks_contested1;
153 	logspin(beg, mtx, 'w');
154 
155 	while (value & SPINLOCK_EXCLUSIVE) {
156 		value = atomic_swap_int(&mtx->lock, SPINLOCK_EXCLUSIVE);
157 		if (exponential_backoff(&backoff)) {
158 			value &= ~SPINLOCK_EXCLUSIVE;
159 			break;
160 		}
161 	}
162 
163 	/*
164 	 * Kill the cached shared bit for our own cpu.  This is the most
165 	 * common case and there's no sense wasting cpu on it.  Since
166 	 * spinlocks aren't recursive, we can't own a shared ref on the
167 	 * spinlock while trying to get an exclusive one.
168 	 *
169 	 * If multiple bits are set do not stall on any single cpu.  Check
170 	 * all cpus that have the cache bit set, then loop and check again,
171 	 * until we've cleaned all the bits.
172 	 */
173 	value &= ~gd->gd_cpumask;
174 
175 	while ((mask = value) != 0) {
176 		while (mask) {
177 			bit = bsfl(value);
178 			if (globaldata_find(bit)->gd_spinlock_rd != mtx) {
179 				value &= ~(1 << bit);
180 			} else if (exponential_backoff(&backoff)) {
181 				value = 0;
182 				break;
183 			}
184 			mask &= ~(1 << bit);
185 		}
186 	}
187 	logspin(end, mtx, 'w');
188 }
189 
190 /*
191  * The cache bit wasn't set for our cpu.  Loop until we can set the bit.
192  * As with the spin_lock_rd() inline we need a memory fence after setting
193  * gd_spinlock_rd to interlock against exclusive spinlocks waiting for
194  * that field to clear.
195  */
196 void
197 spin_lock_rd_contested(struct spinlock *mtx)
198 {
199 	struct exponential_backoff backoff;
200 	globaldata_t gd = mycpu;
201 	int value = mtx->lock;
202 
203 	/*
204 	 * Shortcut the op if we can just set the cache bit.  This case
205 	 * occurs when the last lock was an exclusive lock.
206 	 */
207 	while ((value & SPINLOCK_EXCLUSIVE) == 0) {
208 		if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
209 			return;
210 		value = mtx->lock;
211 	}
212 
213 	exponential_init(&backoff, mtx);
214 	++spinlocks_contested1;
215 
216 	logspin(beg, mtx, 'r');
217 
218 	while ((value & gd->gd_cpumask) == 0) {
219 		if (value & SPINLOCK_EXCLUSIVE) {
220 			gd->gd_spinlock_rd = NULL;
221 			if (exponential_backoff(&backoff)) {
222 				gd->gd_spinlock_rd = mtx;
223 				break;
224 			}
225 			gd->gd_spinlock_rd = mtx;
226 			cpu_mfence();
227 		} else {
228 			if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
229 				break;
230 		}
231 		value = mtx->lock;
232 	}
233 	logspin(end, mtx, 'r');
234 }
235 
236 /*
237  * Handle exponential backoff and indefinite waits.
238  *
239  * If the system is handling a panic we hand the spinlock over to the caller
240  * after 1 second.  After 10 seconds we attempt to print a debugger
241  * backtrace.  We also run pending interrupts in order to allow a console
242  * break into DDB.
243  */
244 static
245 int
246 exponential_backoff(struct exponential_backoff *bo)
247 {
248 	sysclock_t count;
249 	int i;
250 
251 	/*
252 	 * Quick backoff
253 	 */
254 	for (i = 0; i < bo->backoff; ++i)
255 		cpu_nop();
256 	if (bo->backoff < BACKOFF_LIMIT) {
257 		bo->backoff <<= 1;
258 		return (FALSE);
259 	}
260 
261 	/*
262 	 * Indefinite
263 	 */
264 	++spinlocks_contested2;
265 	cpu_spinlock_contested();
266 	if (bo->nsec == 0) {
267 		bo->base = sys_cputimer->count();
268 		bo->nsec = 1;
269 	}
270 
271 	count = sys_cputimer->count();
272 	if (count - bo->base > sys_cputimer->freq) {
273 		kprintf("spin_lock: %p, indefinite wait!\n", bo->mtx);
274 		if (panicstr)
275 			return (TRUE);
276 #if defined(INVARIANTS) && defined(DDB)
277 		if (spin_lock_test_mode) {
278 			db_print_backtrace();
279 			return (TRUE);
280 		}
281 #endif
282 		++bo->nsec;
283 #if defined(INVARIANTS) && defined(DDB)
284 		if (bo->nsec == 11)
285 			db_print_backtrace();
286 #endif
287 		if (bo->nsec == 60)
288 			panic("spin_lock: %p, indefinite wait!\n", bo->mtx);
289 		splz();
290 		bo->base = count;
291 	}
292 	return (FALSE);
293 }
294 
295 /*
296  * If INVARIANTS is enabled various spinlock timing tests can be run
297  * by setting debug.spin_lock_test:
298  *
299  *	1	Test the indefinite wait code
300  *	2	Time the best-case exclusive lock overhead (spin_test_count)
301  *	3	Time the best-case shared lock overhead (spin_test_count)
302  */
303 
304 #ifdef INVARIANTS
305 
306 static int spin_test_count = 10000000;
307 SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0, "");
308 
309 static int
310 sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)
311 {
312         struct spinlock mtx;
313 	int error;
314 	int value = 0;
315 	int i;
316 
317 	if ((error = suser(curthread)) != 0)
318 		return (error);
319 	if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0)
320 		return (error);
321 
322 	/*
323 	 * Indefinite wait test
324 	 */
325 	if (value == 1) {
326 		spin_init(&mtx);
327 		spin_lock_wr(&mtx);	/* force an indefinite wait */
328 		spin_lock_test_mode = 1;
329 		spin_lock_wr(&mtx);
330 		spin_unlock_wr(&mtx);	/* Clean up the spinlock count */
331 		spin_unlock_wr(&mtx);
332 		spin_lock_test_mode = 0;
333 	}
334 
335 	/*
336 	 * Time best-case exclusive spinlocks
337 	 */
338 	if (value == 2) {
339 		globaldata_t gd = mycpu;
340 
341 		spin_init(&mtx);
342 		for (i = spin_test_count; i > 0; --i) {
343 		    spin_lock_wr_quick(gd, &mtx);
344 		    spin_unlock_wr_quick(gd, &mtx);
345 		}
346 	}
347 
348 	/*
349 	 * Time best-case shared spinlocks
350 	 */
351 	if (value == 3) {
352 		globaldata_t gd = mycpu;
353 
354 		spin_init(&mtx);
355 		for (i = spin_test_count; i > 0; --i) {
356 		    spin_lock_rd_quick(gd, &mtx);
357 		    spin_unlock_rd_quick(gd, &mtx);
358 		}
359 	}
360         return (0);
361 }
362 
363 SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT,
364         0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code");
365 
366 #endif	/* INVARIANTS */
367 #endif	/* SMP */
368