xref: /dragonfly/sys/kern/kern_spinlock.c (revision 6693db17)
1 /*
2  * Copyright (c) 2005 Jeffrey M. Hsu.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Jeffrey M. Hsu. and Matthew Dillon
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $DragonFly: src/sys/kern/kern_spinlock.c,v 1.16 2008/09/11 01:11:42 y0netan1 Exp $
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/types.h>
38 #include <sys/kernel.h>
39 #include <sys/sysctl.h>
40 #ifdef INVARIANTS
41 #include <sys/proc.h>
42 #endif
43 #include <sys/priv.h>
44 #include <machine/atomic.h>
45 #include <machine/cpufunc.h>
46 #include <machine/specialreg.h>
47 #include <machine/clock.h>
48 #include <sys/spinlock.h>
49 #include <sys/spinlock2.h>
50 #include <sys/ktr.h>
51 
52 #define	BACKOFF_INITIAL	1
53 #define	BACKOFF_LIMIT	256
54 
55 #ifdef SMP
56 
57 /*
58  * Kernal Trace
59  */
60 #if !defined(KTR_SPIN_CONTENTION)
61 #define KTR_SPIN_CONTENTION	KTR_ALL
62 #endif
63 #define SPIN_STRING	"spin=%p type=%c"
64 #define SPIN_ARG_SIZE	(sizeof(void *) + sizeof(int))
65 
66 KTR_INFO_MASTER(spin);
67 KTR_INFO(KTR_SPIN_CONTENTION, spin, beg, 0, SPIN_STRING, SPIN_ARG_SIZE);
68 KTR_INFO(KTR_SPIN_CONTENTION, spin, end, 1, SPIN_STRING, SPIN_ARG_SIZE);
69 KTR_INFO(KTR_SPIN_CONTENTION, spin, backoff, 2,
70 	 "spin=%p bo1=%d thr=%p bo=%d",
71 	 ((2 * sizeof(void *)) + (2 * sizeof(int))));
72 KTR_INFO(KTR_SPIN_CONTENTION, spin, bofail, 3, SPIN_STRING, SPIN_ARG_SIZE);
73 
74 #define logspin(name, mtx, type)			\
75 	KTR_LOG(spin_ ## name, mtx, type)
76 
77 #define logspin_backoff(mtx, bo1, thr, bo)		\
78 	KTR_LOG(spin_backoff, mtx, bo1, thr, bo)
79 
80 #ifdef INVARIANTS
81 static int spin_lock_test_mode;
82 #endif
83 
84 static int64_t spinlocks_contested1;
85 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested1, CTLFLAG_RD,
86 	    &spinlocks_contested1, 0, "");
87 
88 static int64_t spinlocks_contested2;
89 SYSCTL_QUAD(_debug, OID_AUTO, spinlocks_contested2, CTLFLAG_RD,
90 	    &spinlocks_contested2, 0, "");
91 
92 static int spinlocks_backoff_limit = BACKOFF_LIMIT;
93 SYSCTL_INT(_debug, OID_AUTO, spinlocks_bolim, CTLFLAG_RW,
94 	   &spinlocks_backoff_limit, 0, "");
95 
96 struct exponential_backoff {
97 	int backoff;
98 	int nsec;
99 	struct spinlock *mtx;
100 	sysclock_t base;
101 };
102 static int exponential_backoff(struct exponential_backoff *bo);
103 
104 static __inline
105 void
106 exponential_init(struct exponential_backoff *bo, struct spinlock *mtx)
107 {
108 	bo->backoff = BACKOFF_INITIAL;
109 	bo->nsec = 0;
110 	bo->mtx = mtx;
111 }
112 
113 /*
114  * We were either contested due to another exclusive lock holder,
115  * or due to the presence of shared locks.  We have to undo the mess
116  * we created by returning the shared locks.
117  *
118  * If there was another exclusive lock holder only the exclusive bit
119  * in value will be the only bit set.  We don't have to do anything since
120  * restoration does not involve any work.
121  *
122  * Otherwise we successfully obtained the exclusive bit.  Attempt to
123  * clear the shared bits.  If we are able to clear the shared bits
124  * we win.  Otherwise we lose and we have to restore the shared bits
125  * we couldn't clear (and also clear our exclusive bit).
126  */
127 int
128 spin_trylock_wr_contested(globaldata_t gd, struct spinlock *mtx, int value)
129 {
130 	int bit;
131 
132 	++spinlocks_contested1;
133 	if ((value & SPINLOCK_EXCLUSIVE) == 0) {
134 		while (value) {
135 			bit = bsfl(value);
136 			if (globaldata_find(bit)->gd_spinlock_rd == mtx) {
137 				atomic_swap_int(&mtx->lock, value);
138 				--gd->gd_spinlocks_wr;
139 				return (FALSE);
140 			}
141 			value &= ~(1 << bit);
142 		}
143 		return (TRUE);
144 	}
145 	--gd->gd_spinlocks_wr;
146 	return (FALSE);
147 }
148 
149 /*
150  * We were either contested due to another exclusive lock holder,
151  * or due to the presence of shared locks
152  *
153  * NOTE: If value indicates an exclusively held mutex, no shared bits
154  * would have been set and we can throw away value.
155  */
156 void
157 spin_lock_wr_contested(struct spinlock *mtx, int value)
158 {
159 	struct exponential_backoff backoff;
160 	globaldata_t gd = mycpu;
161 	int bit;
162 	int mask;
163 
164 	/*
165 	 * Wait until we can gain exclusive access vs another exclusive
166 	 * holder.
167 	 */
168 	exponential_init(&backoff, mtx);
169 	++spinlocks_contested1;
170 	logspin(beg, mtx, 'w');
171 
172 	while (value & SPINLOCK_EXCLUSIVE) {
173 		value = atomic_swap_int(&mtx->lock, SPINLOCK_EXCLUSIVE);
174 		if (exponential_backoff(&backoff)) {
175 			value &= ~SPINLOCK_EXCLUSIVE;
176 			break;
177 		}
178 	}
179 
180 	/*
181 	 * Kill the cached shared bit for our own cpu.  This is the most
182 	 * common case and there's no sense wasting cpu on it.  Since
183 	 * spinlocks aren't recursive, we can't own a shared ref on the
184 	 * spinlock while trying to get an exclusive one.
185 	 *
186 	 * If multiple bits are set do not stall on any single cpu.  Check
187 	 * all cpus that have the cache bit set, then loop and check again,
188 	 * until we've cleaned all the bits.
189 	 */
190 	value &= ~gd->gd_cpumask;
191 
192 	while ((mask = value) != 0) {
193 		while (mask) {
194 			bit = bsfl(value);
195 			if (globaldata_find(bit)->gd_spinlock_rd != mtx) {
196 				value &= ~(1 << bit);
197 			} else if (exponential_backoff(&backoff)) {
198 				value = 0;
199 				break;
200 			}
201 			mask &= ~(1 << bit);
202 		}
203 	}
204 	logspin(end, mtx, 'w');
205 }
206 
207 /*
208  * The cache bit wasn't set for our cpu.  Loop until we can set the bit.
209  * As with the spin_lock_rd() inline we need a memory fence after setting
210  * gd_spinlock_rd to interlock against exclusive spinlocks waiting for
211  * that field to clear.
212  */
213 void
214 spin_lock_rd_contested(struct spinlock *mtx)
215 {
216 	struct exponential_backoff backoff;
217 	globaldata_t gd = mycpu;
218 	int value = mtx->lock;
219 
220 	/*
221 	 * Shortcut the op if we can just set the cache bit.  This case
222 	 * occurs when the last lock was an exclusive lock.
223 	 */
224 	while ((value & SPINLOCK_EXCLUSIVE) == 0) {
225 		if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
226 			return;
227 		value = mtx->lock;
228 	}
229 
230 	exponential_init(&backoff, mtx);
231 	++spinlocks_contested1;
232 
233 	logspin(beg, mtx, 'r');
234 
235 	while ((value & gd->gd_cpumask) == 0) {
236 		if (value & SPINLOCK_EXCLUSIVE) {
237 			gd->gd_spinlock_rd = NULL;
238 			if (exponential_backoff(&backoff)) {
239 				gd->gd_spinlock_rd = mtx;
240 				break;
241 			}
242 			gd->gd_spinlock_rd = mtx;
243 			cpu_mfence();
244 		} else {
245 			if (atomic_cmpset_int(&mtx->lock, value, value|gd->gd_cpumask))
246 				break;
247 		}
248 		value = mtx->lock;
249 	}
250 	logspin(end, mtx, 'r');
251 }
252 
253 /*
254  * Handle exponential backoff and indefinite waits.
255  *
256  * If the system is handling a panic we hand the spinlock over to the caller
257  * after 1 second.  After 10 seconds we attempt to print a debugger
258  * backtrace.  We also run pending interrupts in order to allow a console
259  * break into DDB.
260  */
261 static
262 int
263 exponential_backoff(struct exponential_backoff *bo)
264 {
265 	sysclock_t count;
266 	int backoff;
267 
268 #ifdef _RDTSC_SUPPORTED_
269 	if (cpu_feature & CPUID_TSC) {
270 		backoff =
271 		(((u_long)rdtsc() ^ (((u_long)curthread) >> 5)) &
272 		 (bo->backoff - 1)) + BACKOFF_INITIAL;
273 	} else
274 #endif
275 		backoff = bo->backoff;
276 	logspin_backoff(bo->mtx, bo->backoff, curthread, backoff);
277 
278 	/*
279 	 * Quick backoff
280 	 */
281 	for (; backoff; --backoff)
282 		cpu_pause();
283 	if (bo->backoff < spinlocks_backoff_limit) {
284 		bo->backoff <<= 1;
285 		return (FALSE);
286 	} else {
287 		bo->backoff = BACKOFF_INITIAL;
288 	}
289 
290 	logspin(bofail, bo->mtx, 'u');
291 
292 	/*
293 	 * Indefinite
294 	 */
295 	++spinlocks_contested2;
296 	cpu_spinlock_contested();
297 	if (bo->nsec == 0) {
298 		bo->base = sys_cputimer->count();
299 		bo->nsec = 1;
300 	}
301 
302 	count = sys_cputimer->count();
303 	if (count - bo->base > sys_cputimer->freq) {
304 		kprintf("spin_lock: %p, indefinite wait!\n", bo->mtx);
305 		if (panicstr)
306 			return (TRUE);
307 #if defined(INVARIANTS)
308 		if (spin_lock_test_mode) {
309 			print_backtrace();
310 			return (TRUE);
311 		}
312 #endif
313 		++bo->nsec;
314 #if defined(INVARIANTS)
315 		if (bo->nsec == 11)
316 			print_backtrace();
317 #endif
318 		if (bo->nsec == 60)
319 			panic("spin_lock: %p, indefinite wait!\n", bo->mtx);
320 		splz();
321 		bo->base = count;
322 	}
323 	return (FALSE);
324 }
325 
326 /*
327  * If INVARIANTS is enabled various spinlock timing tests can be run
328  * by setting debug.spin_lock_test:
329  *
330  *	1	Test the indefinite wait code
331  *	2	Time the best-case exclusive lock overhead (spin_test_count)
332  *	3	Time the best-case shared lock overhead (spin_test_count)
333  */
334 
335 #ifdef INVARIANTS
336 
337 static int spin_test_count = 10000000;
338 SYSCTL_INT(_debug, OID_AUTO, spin_test_count, CTLFLAG_RW, &spin_test_count, 0, "");
339 
340 static int
341 sysctl_spin_lock_test(SYSCTL_HANDLER_ARGS)
342 {
343         struct spinlock mtx;
344 	int error;
345 	int value = 0;
346 	int i;
347 
348 	if ((error = priv_check(curthread, PRIV_ROOT)) != 0)
349 		return (error);
350 	if ((error = SYSCTL_IN(req, &value, sizeof(value))) != 0)
351 		return (error);
352 
353 	/*
354 	 * Indefinite wait test
355 	 */
356 	if (value == 1) {
357 		spin_init(&mtx);
358 		spin_lock_wr(&mtx);	/* force an indefinite wait */
359 		spin_lock_test_mode = 1;
360 		spin_lock_wr(&mtx);
361 		spin_unlock_wr(&mtx);	/* Clean up the spinlock count */
362 		spin_unlock_wr(&mtx);
363 		spin_lock_test_mode = 0;
364 	}
365 
366 	/*
367 	 * Time best-case exclusive spinlocks
368 	 */
369 	if (value == 2) {
370 		globaldata_t gd = mycpu;
371 
372 		spin_init(&mtx);
373 		for (i = spin_test_count; i > 0; --i) {
374 		    spin_lock_wr_quick(gd, &mtx);
375 		    spin_unlock_wr_quick(gd, &mtx);
376 		}
377 	}
378 
379 	/*
380 	 * Time best-case shared spinlocks
381 	 */
382 	if (value == 3) {
383 		globaldata_t gd = mycpu;
384 
385 		spin_init(&mtx);
386 		for (i = spin_test_count; i > 0; --i) {
387 		    spin_lock_rd_quick(gd, &mtx);
388 		    spin_unlock_rd_quick(gd, &mtx);
389 		}
390 	}
391         return (0);
392 }
393 
394 SYSCTL_PROC(_debug, KERN_PROC_ALL, spin_lock_test, CTLFLAG_RW|CTLTYPE_INT,
395         0, 0, sysctl_spin_lock_test, "I", "Test spinlock wait code");
396 
397 #endif	/* INVARIANTS */
398 #endif	/* SMP */
399