xref: /dragonfly/sys/kern/kern_spinlock.c (revision ce0e08e2)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu. and Matthew Dillon
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $DragonFly: src/sys/kern/kern_spinlock.c,v 1.16 2008/09/11 01:11:42 y0netan1 Exp $
33  */
34 
35 #include "opt_ddb.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/types.h>
40 #include <sys/kernel.h>
41 #include <sys/sysctl.h>
42 #ifdef INVARIANTS
43 #include <sys/proc.h>
44 #endif
45 #include <ddb/ddb.h>
46 #include <machine/atomic.h>
47 #include <machine/cpufunc.h>
48 #include <machine/specialreg.h>
49 #include <machine/clock.h>
50 #include <sys/spinlock.h>
51 #include <sys/spinlock2.h>
52 #include <sys/ktr.h>
53 
54 #define	BACKOFF_INITIAL	1
55 #define	BACKOFF_LIMIT	256
56 
57 #ifdef SMP
58 
59 /*
60  * Kernal Trace
61  */
62 #if !defined(KTR_SPIN_CONTENTION)
63 #define KTR_SPIN_CONTENTION	KTR_ALL
64 #endif
65 #define SPIN_STRING	"spin=%p type=%c"
66 #define SPIN_ARG_SIZE	(sizeof(void *) + sizeof(int))
67 
68 KTR_INFO_MASTER(spin);
69 KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE);
70 KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE);
71 KTR_INFO(KTR_SPIN_CONTENTION, spin, backoff, 2,
72 	 "spin=%p bo1=%d thr=%p bo=%d",
73 	 ((2 * sizeof(void *)) + (2 * sizeof(int))));
74 KTR_INFO(KTR_SPIN_CONTENTION, spin, bofail, 3, SPIN_STRING, SPIN_ARG_SIZE);
75 
76 #define logspin(name, mtx, type)			\
77 	KTR_LOG(spin_ ## name, mtx, type)
78 
79 #define logspin_backoff(mtx, bo1, thr, bo)		\
80 	KTR_LOG(spin_backoff, mtx, bo1, thr, bo)
81 
82 #ifdef INVARIANTS
83 static int spin_lock_test_mode;
84 #endif
85 
86 static int64_t spinlocks_contested1;
87 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD,
88 	    &spinlocks_contested1, 0, "");
89 
90 static int64_t spinlocks_contested2;
91 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD,
92 	    &spinlocks_contested2, 0, "");
93 
94 static int spinlocks_backoff_limit = BACKOFF_LIMIT;
95 SYSCTL_INT(_debug, OID_AUTO, spinlocks_bolim, CTLFLAG_RW,
96 	   &spinlocks_backoff_limit, 0, "");
97 
98 struct exponential_backoff {
99 	int backoff;
100 	int nsec;
101 	struct spinlock *mtx;
102 	sysclock_t base;
103 };
104 static int exponential_backoff(struct exponential_backoff *bo);
105 
106 static __inline
107 void
108 exponential_init(struct exponential_backoff *bo, struct spinlock *mtx)
109 {
110 	bo->backoff = BACKOFF_INITIAL;
111 	bo->nsec = 0;
112 	bo->mtx = mtx;
113 }
114 
115 /*
116  * We were either contested due to another exclusive lock holder,
117  * or due to the presence of shared locks.  We have to undo the mess
118  * we created by returning the shared locks.
119  *
120  * If there was another exclusive lock holder only the exclusive bit
121  * in value will be the only bit set.  We don't have to do anything since
122  * restoration does not involve any work.
123  *
124  * Otherwise we successfully obtained the exclusive bit.  Attempt to
125  * clear the shared bits.  If we are able to clear the shared bits
126  * we win.  Otherwise we lose and we have to restore the shared bits
127  * we couldn't clear (and also clear our exclusive bit).
128  */
129 int
130 spin_trylock_wr_contested(globaldata_t gd, struct spinlock *mtx, int value)
131 {
132 	int bit;
133 
134 	++spinlocks_contested1;
135 	if ((value & SPINLOCK_EXCLUSIVE) == 0) {
136 		while (value) {
137 			bit = bsfl(value);
138 			if (globaldata_find(bit)->gd_spinlock_rd == mtx) {
139 				atomic_swap_int(&mtx->lock, value);
140 				--gd->gd_spinlocks_wr;
141 				return (FALSE);
142 			}
143 			value &= ~(1 << bit);
144 		}
145 		return (TRUE);
146 	}
147 	--gd->gd_spinlocks_wr;
148 	return (FALSE);
149 }
150 
151 /*
152  * We were either contested due to another exclusive lock holder,
153  * or due to the presence of shared locks
154  *
155  * NOTE: If value indicates an exclusively held mutex, no shared bits
156  * would have been set and we can throw away value.
157  */
158 void
159 spin_lock_wr_contested(struct spinlock *mtx, int value)
160 {
161 	struct exponential_backoff backoff;
162 	globaldata_t gd = mycpu;
163 	int bit;
164 	int mask;
165 
166 	/*
167 	 * Wait until we can gain exclusive access vs another exclusive
168 	 * holder.
169 	 */
170 	exponential_init(&backoff, mtx);
171 	++spinlocks_contested1;
172 	logspin(beg, mtx, 'w');
173 
174 	while (value & SPINLOCK_EXCLUSIVE) {
175 		value = atomic_swap_int(&mtx->lock, SPINLOCK_EXCLUSIVE);
176 		if (exponential_backoff(&backoff)) {
177 			value &= ~SPINLOCK_EXCLUSIVE;
178 			break;
179 		}
180 	}
181 
182 	/*
183 	 * Kill the cached shared bit for our own cpu.  This is the most
184 	 * common case and there's no sense wasting cpu on it.  Since
185 	 * spinlocks aren't recursive, we can't own a shared ref on the
186 	 * spinlock while trying to get an exclusive one.
187 	 *
188 	 * If multiple bits are set do not stall on any single cpu.  Check
189 	 * all cpus that have the cache bit set, then loop and check again,
190 	 * until we've cleaned all the bits.
191 	 */
192 	value &= ~gd->gd_cpumask;
193 
194 	while ((mask = value) != 0) {
195 		while (mask) {
196 			bit = bsfl(value);
197 			if (globaldata_find(bit)->gd_spinlock_rd != mtx) {
198 				value &= ~(1 << bit);
199 			} else if (exponential_backoff(&backoff)) {
200 				value = 0;
201 				break;
202 			}
203 			mask &= ~(1 << bit);
204 		}
205 	}
206 	logspin(end, mtx, 'w');
207 }
208 
209 /*
210  * The cache bit wasn't set for our cpu.  Loop until we can set the bit.
211  * As with the spin_lock_rd() inline we need a memory fence after setting
212  * gd_spinlock_rd to interlock against exclusive spinlocks waiting for
213  * that field to clear.
214  */
215 void
216 spin_lock_rd_contested(struct spinlock *mtx)
217 {
218 	struct exponential_backoff backoff;
219 	globaldata_t gd = mycpu;
220 	int value = mtx->lock;
221 
222 	/*
223 	 * Shortcut the op if we can just set the cache bit.  This case
224 	 * occurs when the last lock was an exclusive lock.
225 	 */
226 	while ((value & SPINLOCK_EXCLUSIVE) == 0) {
227 		if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
228 			return;
229 		value = mtx->lock;
230 	}
231 
232 	exponential_init(&backoff, mtx);
233 	++spinlocks_contested1;
234 
235 	logspin(beg, mtx, 'r');
236 
237 	while ((value & gd->gd_cpumask) == 0) {
238 		if (value & SPINLOCK_EXCLUSIVE) {
239 			gd->gd_spinlock_rd = NULL;
240 			if (exponential_backoff(&backoff)) {
241 				gd->gd_spinlock_rd = mtx;
242 				break;
243 			}
244 			gd->gd_spinlock_rd = mtx;
245 			cpu_mfence();
246 		} else {
247 			if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
248 				break;
249 		}
250 		value = mtx->lock;
251 	}
252 	logspin(end, mtx, 'r');
253 }
254 
255 /*
256  * Handle exponential backoff and indefinite waits.
257  *
258  * If the system is handling a panic we hand the spinlock over to the caller
259  * after 1 second.  After 10 seconds we attempt to print a debugger
260  * backtrace.  We also run pending interrupts in order to allow a console
261  * break into DDB.
262  */
263 static
264 int
265 exponential_backoff(struct exponential_backoff *bo)
266 {
267 	sysclock_t count;
268 	int backoff;
269 
270 #ifdef _RDTSC_SUPPORTED_
271 	if (cpu_feature & CPUID_TSC) {
272 		backoff =
273 		(((u_long)rdtsc() ^ (((u_long)curthread) >> 5)) &
274 		 (bo->backoff - 1)) + BACKOFF_INITIAL;
275 	} else
276 #endif
277 		backoff = bo->backoff;
278 	logspin_backoff(bo->mtx, bo->backoff, curthread, backoff);
279 
280 	/*
281 	 * Quick backoff
282 	 */
283 	for (; backoff; --backoff)
284 		cpu_pause();
285 	if (bo->backoff < spinlocks_backoff_limit) {
286 		bo->backoff <<= 1;
287 		return (FALSE);
288 	} else {
289 		bo->backoff = BACKOFF_INITIAL;
290 	}
291 
292 	logspin(bofail, bo->mtx, 'u');
293 
294 	/*
295 	 * Indefinite
296 	 */
297 	++spinlocks_contested2;
298 	cpu_spinlock_contested();
299 	if (bo->nsec == 0) {
300 		bo->base = sys_cputimer->count();
301 		bo->nsec = 1;
302 	}
303 
304 	count = sys_cputimer->count();
305 	if (count - bo->base > sys_cputimer->freq) {
306 		kprintf("spin_lock: %p, indefinite wait!\n", bo->mtx);
307 		if (panicstr)
308 			return (TRUE);
309 #if defined(INVARIANTS) && defined(DDB)
310 		if (spin_lock_test_mode) {
311 			db_print_backtrace();
312 			return (TRUE);
313 		}
314 #endif
315 		++bo->nsec;
316 #if defined(INVARIANTS) && defined(DDB)
317 		if (bo->nsec == 11)
318 			db_print_backtrace();
319 #endif
320 		if (bo->nsec == 60)
321 			panic("spin_lock: %p, indefinite wait!\n", bo->mtx);
322 		splz();
323 		bo->base = count;
324 	}
325 	return (FALSE);
326 }
327 
328 /*
329  * If INVARIANTS is enabled various spinlock timing tests can be run
330  * by setting debug.spin_lock_test:
331  *
332  *	1	Test the indefinite wait code
333  *	2	Time the best-case exclusive lock overhead (spin_test_count)
334  *	3	Time the best-case shared lock overhead (spin_test_count)
335  */
336 
337 #ifdef INVARIANTS
338 
339 static int spin_test_count = 10000000;
340 SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0, "");
341 
342 static int
343 sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)
344 {
345         struct spinlock mtx;
346 	int error;
347 	int value = 0;
348 	int i;
349 
350 	if ((error = suser(curthread)) != 0)
351 		return (error);
352 	if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0)
353 		return (error);
354 
355 	/*
356 	 * Indefinite wait test
357 	 */
358 	if (value == 1) {
359 		spin_init(&mtx);
360 		spin_lock_wr(&mtx);	/* force an indefinite wait */
361 		spin_lock_test_mode = 1;
362 		spin_lock_wr(&mtx);
363 		spin_unlock_wr(&mtx);	/* Clean up the spinlock count */
364 		spin_unlock_wr(&mtx);
365 		spin_lock_test_mode = 0;
366 	}
367 
368 	/*
369 	 * Time best-case exclusive spinlocks
370 	 */
371 	if (value == 2) {
372 		globaldata_t gd = mycpu;
373 
374 		spin_init(&mtx);
375 		for (i = spin_test_count; i > 0; --i) {
376 		    spin_lock_wr_quick(gd, &mtx);
377 		    spin_unlock_wr_quick(gd, &mtx);
378 		}
379 	}
380 
381 	/*
382 	 * Time best-case shared spinlocks
383 	 */
384 	if (value == 3) {
385 		globaldata_t gd = mycpu;
386 
387 		spin_init(&mtx);
388 		for (i = spin_test_count; i > 0; --i) {
389 		    spin_lock_rd_quick(gd, &mtx);
390 		    spin_unlock_rd_quick(gd, &mtx);
391 		}
392 	}
393         return (0);
394 }
395 
396 SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT,
397         0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code");
398 
399 #endif	/* INVARIANTS */
400 #endif	/* SMP */
401