xref: /dragonfly/sys/kern/lwkt_token.c (revision d9d67b59)
1 /*
2  * Copyright (c) 2003,2004,2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * lwkt_token - Implement soft token locks.
37  *
38  * Tokens are locks which serialize a thread only while the thread is
39  * running.  If the thread blocks all tokens are released, then reacquired
40  * when the thread resumes.
41  *
42  * This implementation requires no critical sections or spin locks, but
43  * does use atomic_cmpset_ptr().
44  *
45  * Tokens may be recursively acquired by the same thread.  However the
46  * caller must be sure to release such tokens in reverse order.
47  */
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/rtprio.h>
53 #include <sys/queue.h>
54 #include <sys/sysctl.h>
55 #include <sys/ktr.h>
56 #include <sys/kthread.h>
57 #include <machine/cpu.h>
58 #include <sys/lock.h>
59 #include <sys/spinlock.h>
60 
61 #include <sys/thread2.h>
62 #include <sys/spinlock2.h>
63 #include <sys/mplock2.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/vm_kern.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_pager.h>
72 #include <vm/vm_extern.h>
73 #include <vm/vm_zone.h>
74 
75 #include <machine/stdarg.h>
76 #include <machine/smp.h>
77 
78 #include "opt_ddb.h"
79 #ifdef DDB
80 #include <ddb/ddb.h>
81 #endif
82 
83 extern int lwkt_sched_debug;
84 
85 #ifndef LWKT_NUM_POOL_TOKENS
86 #define LWKT_NUM_POOL_TOKENS	4001	/* prime number */
87 #endif
88 
89 struct lwkt_pool_token {
90 	struct lwkt_token	token;
91 } __cachealign;
92 
93 static struct lwkt_pool_token	pool_tokens[LWKT_NUM_POOL_TOKENS];
94 struct spinlock		tok_debug_spin = SPINLOCK_INITIALIZER(&tok_debug_spin, "tok_debug_spin");
95 
96 #define TOKEN_STRING	"REF=%p TOK=%p TD=%p"
97 #define TOKEN_ARGS	lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
98 #define CONTENDED_STRING	TOKEN_STRING " (contention started)"
99 #define UNCONTENDED_STRING	TOKEN_STRING " (contention stopped)"
100 #if !defined(KTR_TOKENS)
101 #define	KTR_TOKENS	KTR_ALL
102 #endif
103 
104 KTR_INFO_MASTER(tokens);
105 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
106 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
107 #if 0
108 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
109 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
110 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
111 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
112 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
113 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
114 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
115 #endif
116 
117 #define logtoken(name, ref)						\
118 	KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
119 
120 /*
121  * Global tokens.  These replace the MP lock for major subsystem locking.
122  * These tokens are initially used to lockup both global and individual
123  * operations.
124  *
125  * Once individual structures get their own locks these tokens are used
126  * only to protect global lists & other variables and to interlock
127  * allocations and teardowns and such.
128  *
129  * The UP initializer causes token acquisition to also acquire the MP lock
130  * for maximum compatibility.  The feature may be enabled and disabled at
131  * any time, the MP state is copied to the tokref when the token is acquired
132  * and will not race against sysctl changes.
133  */
134 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
135 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
136 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
137 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
138 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
139 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
140 struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
141 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
142 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
143 
144 static int lwkt_token_spin = 5;
145 SYSCTL_INT(_lwkt, OID_AUTO, token_spin, CTLFLAG_RW,
146     &lwkt_token_spin, 0, "Decontention spin loops");
147 static int lwkt_token_delay = 0;
148 SYSCTL_INT(_lwkt, OID_AUTO, token_delay, CTLFLAG_RW,
149     &lwkt_token_delay, 0, "Decontention spin delay in ns");
150 
151 /*
152  * The collision count is bumped every time the LWKT scheduler fails
153  * to acquire needed tokens in addition to a normal lwkt_gettoken()
154  * stall.
155  */
156 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
157     &mp_token.t_collisions, 0, "Collision counter of mp_token");
158 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
159     &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
160 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
161     &dev_token.t_collisions, 0, "Collision counter of dev_token");
162 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
163     &vm_token.t_collisions, 0, "Collision counter of vm_token");
164 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
165     &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
166 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
167     &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
168 SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
169     &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
170 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
171     &tty_token.t_collisions, 0, "Collision counter of tty_token");
172 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
173     &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
174 
175 int tokens_debug_output;
176 SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
177     &tokens_debug_output, 0, "Generate stack trace N times");
178 
179 
180 #ifdef DEBUG_LOCKS_LATENCY
181 
182 static long tokens_add_latency;
183 SYSCTL_LONG(_debug, OID_AUTO, tokens_add_latency, CTLFLAG_RW,
184 	    &tokens_add_latency, 0,
185 	    "Add spinlock latency");
186 
187 #endif
188 
189 
190 static int _lwkt_getalltokens_sorted(thread_t td);
191 
192 /*
193  * Acquire the initial mplock
194  *
195  * (low level boot only)
196  */
197 void
198 cpu_get_initial_mplock(void)
199 {
200 	KKASSERT(mp_token.t_ref == NULL);
201 	if (lwkt_trytoken(&mp_token) == FALSE)
202 		panic("cpu_get_initial_mplock");
203 }
204 
205 /*
206  * Return a pool token given an address.  Use a prime number to reduce
207  * overlaps.
208  */
209 static __inline
210 lwkt_token_t
211 _lwkt_token_pool_lookup(void *ptr)
212 {
213 	u_int i;
214 
215 	i = (u_int)(uintptr_t)ptr % LWKT_NUM_POOL_TOKENS;
216 	return (&pool_tokens[i].token);
217 }
218 
219 /*
220  * Initialize a tokref_t prior to making it visible in the thread's
221  * token array.
222  */
223 static __inline
224 void
225 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
226 {
227 	ref->tr_tok = tok;
228 	ref->tr_count = excl;
229 	ref->tr_owner = td;
230 }
231 
232 /*
233  * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
234  * FALSE on failure.
235  *
236  * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
237  * token, otherwise are attempting to get a shared token.
238  *
239  * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
240  * it is a non-blocking operation (for both exclusive or shared acquisions).
241  */
242 static __inline
243 int
244 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
245 {
246 	lwkt_token_t tok;
247 	lwkt_tokref_t oref;
248 	long count;
249 
250 	tok = ref->tr_tok;
251 	KASSERT(((mode & TOK_EXCLREQ) == 0 ||	/* non blocking */
252 		td->td_gd->gd_intr_nesting_level == 0 ||
253 		panic_cpu_gd == mycpu),
254 		("Attempt to acquire token %p not already "
255 		"held in hard code section", tok));
256 
257 	if (mode & TOK_EXCLUSIVE) {
258 		/*
259 		 * Attempt to get an exclusive token
260 		 */
261 		count = tok->t_count;
262 
263 		for (;;) {
264 			oref = tok->t_ref;	/* can be NULL */
265 			cpu_ccfence();
266 			if ((count & ~TOK_EXCLREQ) == 0) {
267 				/*
268 				 * It is possible to get the exclusive bit.
269 				 * We must clear TOK_EXCLREQ on successful
270 				 * acquisition.
271 				 */
272 				if (atomic_fcmpset_long(&tok->t_count, &count,
273 						        (count & ~TOK_EXCLREQ) |
274 						        TOK_EXCLUSIVE)) {
275 					KKASSERT(tok->t_ref == NULL);
276 					tok->t_ref = ref;
277 					return TRUE;
278 				}
279 				/* retry */
280 			} else if ((count & TOK_EXCLUSIVE) &&
281 				   oref >= &td->td_toks_base &&
282 				   oref < td->td_toks_stop) {
283 				/*
284 				 * Our thread already holds the exclusive
285 				 * bit, we treat this tokref as a shared
286 				 * token (sorta) to make the token release
287 				 * code easier.
288 				 *
289 				 * NOTE: oref cannot race above if it
290 				 *	 happens to be ours, so we're good.
291 				 *	 But we must still have a stable
292 				 *	 variable for both parts of the
293 				 *	 comparison.
294 				 *
295 				 * NOTE: Since we already have an exclusive
296 				 *	 lock and don't need to check EXCLREQ
297 				 *	 we can just use an atomic_add here
298 				 */
299 				atomic_add_long(&tok->t_count, TOK_INCR);
300 				ref->tr_count &= ~TOK_EXCLUSIVE;
301 				return TRUE;
302 			} else if ((mode & TOK_EXCLREQ) &&
303 				   (count & TOK_EXCLREQ) == 0) {
304 				/*
305 				 * Unable to get the exclusive bit but being
306 				 * asked to set the exclusive-request bit.
307 				 * Since we are going to retry anyway just
308 				 * set the bit unconditionally.
309 				 */
310 				atomic_set_long(&tok->t_count, TOK_EXCLREQ);
311 				return FALSE;
312 			} else {
313 				/*
314 				 * Unable to get the exclusive bit and not
315 				 * being asked to set the exclusive-request
316 				 * (aka lwkt_trytoken()), or EXCLREQ was
317 				 * already set.
318 				 */
319 				cpu_pause();
320 				return FALSE;
321 			}
322 			/* retry */
323 		}
324 	} else {
325 		/*
326 		 * Attempt to get a shared token.  Note that TOK_EXCLREQ
327 		 * for shared tokens simply means the caller intends to
328 		 * block.  We never actually set the bit in tok->t_count.
329 		 */
330 		count = tok->t_count;
331 
332 		for (;;) {
333 			oref = tok->t_ref;	/* can be NULL */
334 			cpu_ccfence();
335 			if ((count & (TOK_EXCLUSIVE/*|TOK_EXCLREQ*/)) == 0) {
336 				/*
337 				 * It may be possible to get the token shared.
338 				 */
339 				if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
340 					return TRUE;
341 				}
342 				count = atomic_fetchadd_long(&tok->t_count,
343 							     -TOK_INCR);
344 				count -= TOK_INCR;
345 				/* retry */
346 			} else if ((count & TOK_EXCLUSIVE) &&
347 				   oref >= &td->td_toks_base &&
348 				   oref < td->td_toks_stop) {
349 				/*
350 				 * We own the exclusive bit on the token so
351 				 * we can in fact also get it shared.
352 				 */
353 				atomic_add_long(&tok->t_count, TOK_INCR);
354 				return TRUE;
355 			} else {
356 				/*
357 				 * We failed to get the token shared
358 				 */
359 				return FALSE;
360 			}
361 			/* retry */
362 		}
363 	}
364 }
365 
366 static __inline
367 int
368 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
369 {
370 	int spin;
371 
372 	if (_lwkt_trytokref(ref, td, mode)) {
373 #ifdef DEBUG_LOCKS_LATENCY
374 		long j;
375 		for (j = tokens_add_latency; j > 0; --j)
376 			cpu_ccfence();
377 #endif
378 		return TRUE;
379 	}
380 	for (spin = lwkt_token_spin; spin > 0; --spin) {
381 		if (lwkt_token_delay)
382 			tsc_delay(lwkt_token_delay);
383 		else
384 			cpu_pause();
385 		if (_lwkt_trytokref(ref, td, mode)) {
386 #ifdef DEBUG_LOCKS_LATENCY
387 			long j;
388 			for (j = tokens_add_latency; j > 0; --j)
389 				cpu_ccfence();
390 #endif
391 			return TRUE;
392 		}
393 	}
394 	return FALSE;
395 }
396 
397 /*
398  * Release a token that we hold.
399  */
400 static __inline
401 void
402 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
403 {
404 	lwkt_token_t tok;
405 	long count;
406 
407 	tok = ref->tr_tok;
408 	count = tok->t_count;
409 
410 	for (;;) {
411 		cpu_ccfence();
412 		if (tok->t_ref == ref) {
413 			/*
414 			 * We are an exclusive holder.  We must clear tr_ref
415 			 * before we clear the TOK_EXCLUSIVE bit.  If we are
416 			 * unable to clear the bit we must restore
417 			 * tok->t_ref.
418 			 */
419 			KKASSERT(count & TOK_EXCLUSIVE);
420 			tok->t_ref = NULL;
421 			if (atomic_fcmpset_long(&tok->t_count, &count,
422 					        count & ~TOK_EXCLUSIVE)) {
423 				return;
424 			}
425 			tok->t_ref = ref;
426 			/* retry */
427 		} else {
428 			/*
429 			 * We are a shared holder
430 			 */
431 			KKASSERT(count & TOK_COUNTMASK);
432 			if (atomic_fcmpset_long(&tok->t_count, &count,
433 						count - TOK_INCR)) {
434 				return;
435 			}
436 			/* retry */
437 		}
438 		/* retry */
439 	}
440 }
441 
442 /*
443  * Obtain all the tokens required by the specified thread on the current
444  * cpu, return 0 on failure and non-zero on success.  If a failure occurs
445  * any partially acquired tokens will be released prior to return.
446  *
447  * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
448  * tokens that the thread had to release when it switched away.
449  *
450  * If spinning is non-zero this function acquires the tokens in a particular
451  * order to deal with potential deadlocks.  We simply use address order for
452  * the case.
453  *
454  * Called from a critical section.
455  */
456 int
457 lwkt_getalltokens(thread_t td, int spinning)
458 {
459 	lwkt_tokref_t scan;
460 	lwkt_token_t tok;
461 
462 	if (spinning)
463 		return(_lwkt_getalltokens_sorted(td));
464 
465 	/*
466 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
467 	 */
468 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
469 		tok = scan->tr_tok;
470 		for (;;) {
471 			/*
472 			 * Only try really hard on the last token
473 			 */
474 			if (scan == td->td_toks_stop - 1) {
475 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
476 				    break;
477 			} else {
478 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
479 				    break;
480 			}
481 
482 			/*
483 			 * Otherwise we failed to acquire all the tokens.
484 			 * Release whatever we did get.
485 			 */
486 			KASSERT(tok->t_desc,
487 				("token %p is not initialized", tok));
488 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
489 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
490 				tok->t_desc,
491 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
492 			if (lwkt_sched_debug > 0) {
493 				--lwkt_sched_debug;
494 				kprintf("toka %p %s %s\n",
495 					tok, tok->t_desc, td->td_comm);
496 			}
497 			td->td_wmesg = tok->t_desc;
498 			++tok->t_collisions;
499 			while (--scan >= &td->td_toks_base)
500 				_lwkt_reltokref(scan, td);
501 			return(FALSE);
502 		}
503 	}
504 	return (TRUE);
505 }
506 
507 /*
508  * Release all tokens owned by the specified thread on the current cpu.
509  *
510  * This code is really simple.  Even in cases where we own all the tokens
511  * note that t_ref may not match the scan for recursively held tokens which
512  * are held deeper in the stack, or for the case where a lwkt_getalltokens()
513  * failed.
514  *
515  * Tokens are released in reverse order to reduce chasing race failures.
516  *
517  * Called from a critical section.
518  */
519 void
520 lwkt_relalltokens(thread_t td)
521 {
522 	lwkt_tokref_t scan;
523 
524 	/*
525 	 * Weird order is to try to avoid a panic loop
526 	 */
527 	if (td->td_toks_have) {
528 		scan = td->td_toks_have;
529 		td->td_toks_have = NULL;
530 	} else {
531 		scan = td->td_toks_stop;
532 	}
533 	while (--scan >= &td->td_toks_base)
534 		_lwkt_reltokref(scan, td);
535 }
536 
537 /*
538  * This is the decontention version of lwkt_getalltokens().  The tokens are
539  * acquired in address-sorted order to deal with any deadlocks.  Ultimately
540  * token failures will spin into the scheduler and get here.
541  *
542  * Called from critical section
543  */
544 static
545 int
546 _lwkt_getalltokens_sorted(thread_t td)
547 {
548 	lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
549 	lwkt_tokref_t scan;
550 	lwkt_token_t tok;
551 	int i;
552 	int j;
553 	int n;
554 
555 	/*
556 	 * Sort the token array.  Yah yah, I know this isn't fun.
557 	 *
558 	 * NOTE: Recursively acquired tokens are ordered the same as in the
559 	 *	 td_toks_array so we can always get the earliest one first.
560 	 */
561 	i = 0;
562 	scan = &td->td_toks_base;
563 	while (scan < td->td_toks_stop) {
564 		for (j = 0; j < i; ++j) {
565 			if (scan->tr_tok < sort_array[j]->tr_tok)
566 				break;
567 		}
568 		if (j != i) {
569 			bcopy(sort_array + j, sort_array + j + 1,
570 			      (i - j) * sizeof(lwkt_tokref_t));
571 		}
572 		sort_array[j] = scan;
573 		++scan;
574 		++i;
575 	}
576 	n = i;
577 
578 	/*
579 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
580 	 */
581 	for (i = 0; i < n; ++i) {
582 		scan = sort_array[i];
583 		tok = scan->tr_tok;
584 		for (;;) {
585 			/*
586 			 * Only try really hard on the last token
587 			 */
588 			if (scan == td->td_toks_stop - 1) {
589 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
590 				    break;
591 			} else {
592 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
593 				    break;
594 			}
595 
596 			/*
597 			 * Otherwise we failed to acquire all the tokens.
598 			 * Release whatever we did get.
599 			 */
600 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
601 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
602 				tok->t_desc,
603 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
604 			if (lwkt_sched_debug > 0) {
605 				--lwkt_sched_debug;
606 				kprintf("tokb %p %s %s\n",
607 					tok, tok->t_desc, td->td_comm);
608 			}
609 			td->td_wmesg = tok->t_desc;
610 			++tok->t_collisions;
611 			while (--i >= 0) {
612 				scan = sort_array[i];
613 				_lwkt_reltokref(scan, td);
614 			}
615 			return(FALSE);
616 		}
617 	}
618 
619 	/*
620 	 * We were successful, there is no need for another core to signal
621 	 * us.
622 	 */
623 	return (TRUE);
624 }
625 
626 /*
627  * Get a serializing token.  This routine can block.
628  */
629 void
630 lwkt_gettoken(lwkt_token_t tok)
631 {
632 	thread_t td = curthread;
633 	lwkt_tokref_t ref;
634 
635 	ref = td->td_toks_stop;
636 	KKASSERT(ref < &td->td_toks_end);
637 	++td->td_toks_stop;
638 	cpu_ccfence();
639 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
640 
641 #ifdef DEBUG_LOCKS
642 	/*
643 	 * Taking an exclusive token after holding it shared will
644 	 * livelock. Scan for that case and assert.
645 	 */
646 	lwkt_tokref_t tk;
647 	int found = 0;
648 	for (tk = &td->td_toks_base; tk < ref; tk++) {
649 		if (tk->tr_tok != tok)
650 			continue;
651 
652 		found++;
653 		if (tk->tr_count & TOK_EXCLUSIVE)
654 			goto good;
655 	}
656 	/* We found only shared instances of this token if found >0 here */
657 	KASSERT((found == 0), ("Token %p s/x livelock", tok));
658 good:
659 #endif
660 
661 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
662 		return;
663 
664 	/*
665 	 * Give up running if we can't acquire the token right now.
666 	 *
667 	 * Since the tokref is already active the scheduler now
668 	 * takes care of acquisition, so we need only call
669 	 * lwkt_switch().
670 	 *
671 	 * Since we failed this was not a recursive token so upon
672 	 * return tr_tok->t_ref should be assigned to this specific
673 	 * ref.
674 	 */
675 	td->td_wmesg = tok->t_desc;
676 	++tok->t_collisions;
677 	logtoken(fail, ref);
678 	td->td_toks_have = td->td_toks_stop - 1;
679 
680 	if (tokens_debug_output > 0) {
681 		--tokens_debug_output;
682 		spin_lock(&tok_debug_spin);
683 		kprintf("Excl Token thread %p %s %s\n",
684 			td, tok->t_desc, td->td_comm);
685 		print_backtrace(6);
686 		kprintf("\n");
687 		spin_unlock(&tok_debug_spin);
688 	}
689 
690 	lwkt_switch();
691 	logtoken(succ, ref);
692 	KKASSERT(tok->t_ref == ref);
693 }
694 
695 /*
696  * Similar to gettoken but we acquire a shared token instead of an exclusive
697  * token.
698  */
699 void
700 lwkt_gettoken_shared(lwkt_token_t tok)
701 {
702 	thread_t td = curthread;
703 	lwkt_tokref_t ref;
704 
705 	ref = td->td_toks_stop;
706 	KKASSERT(ref < &td->td_toks_end);
707 	++td->td_toks_stop;
708 	cpu_ccfence();
709 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
710 
711 #ifdef DEBUG_LOCKS
712         /*
713          * Taking a pool token in shared mode is a bad idea; other
714          * addresses deeper in the call stack may hash to the same pool
715          * token and you may end up with an exclusive-shared livelock.
716          * Warn in this condition.
717          */
718         if ((tok >= &pool_tokens[0].token) &&
719             (tok < &pool_tokens[LWKT_NUM_POOL_TOKENS].token))
720                 kprintf("Warning! Taking pool token %p in shared mode\n", tok);
721 #endif
722 
723 
724 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
725 		return;
726 
727 	/*
728 	 * Give up running if we can't acquire the token right now.
729 	 *
730 	 * Since the tokref is already active the scheduler now
731 	 * takes care of acquisition, so we need only call
732 	 * lwkt_switch().
733 	 *
734 	 * Since we failed this was not a recursive token so upon
735 	 * return tr_tok->t_ref should be assigned to this specific
736 	 * ref.
737 	 */
738 	td->td_wmesg = tok->t_desc;
739 	++tok->t_collisions;
740 	logtoken(fail, ref);
741 	td->td_toks_have = td->td_toks_stop - 1;
742 
743 	if (tokens_debug_output > 0) {
744 		--tokens_debug_output;
745 		spin_lock(&tok_debug_spin);
746 		kprintf("Shar Token thread %p %s %s\n",
747 			td, tok->t_desc, td->td_comm);
748 		print_backtrace(6);
749 		kprintf("\n");
750 		spin_unlock(&tok_debug_spin);
751 	}
752 
753 	lwkt_switch();
754 	logtoken(succ, ref);
755 }
756 
757 /*
758  * Attempt to acquire a token, return TRUE on success, FALSE on failure.
759  *
760  * We setup the tokref in case we actually get the token (if we switch later
761  * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
762  * TOK_EXCLREQ in case we fail.
763  */
764 int
765 lwkt_trytoken(lwkt_token_t tok)
766 {
767 	thread_t td = curthread;
768 	lwkt_tokref_t ref;
769 
770 	ref = td->td_toks_stop;
771 	KKASSERT(ref < &td->td_toks_end);
772 	++td->td_toks_stop;
773 	cpu_ccfence();
774 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
775 
776 	if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
777 		return TRUE;
778 
779 	/*
780 	 * Failed, unpend the request
781 	 */
782 	cpu_ccfence();
783 	--td->td_toks_stop;
784 	++tok->t_collisions;
785 	return FALSE;
786 }
787 
788 lwkt_token_t
789 lwkt_getpooltoken(void *ptr)
790 {
791 	lwkt_token_t tok;
792 
793 	tok = _lwkt_token_pool_lookup(ptr);
794 	lwkt_gettoken(tok);
795 	return (tok);
796 }
797 
798 /*
799  * Release a serializing token.
800  *
801  * WARNING!  All tokens must be released in reverse order.  This will be
802  *	     asserted.
803  */
804 void
805 lwkt_reltoken(lwkt_token_t tok)
806 {
807 	thread_t td = curthread;
808 	lwkt_tokref_t ref;
809 
810 	/*
811 	 * Remove ref from thread token list and assert that it matches
812 	 * the token passed in.  Tokens must be released in reverse order.
813 	 */
814 	ref = td->td_toks_stop - 1;
815 	KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok);
816 	_lwkt_reltokref(ref, td);
817 	cpu_sfence();
818 	td->td_toks_stop = ref;
819 }
820 
821 /*
822  * It is faster for users of lwkt_getpooltoken() to use the returned
823  * token and just call lwkt_reltoken(), but for convenience we provide
824  * this function which looks the token up based on the ident.
825  */
826 void
827 lwkt_relpooltoken(void *ptr)
828 {
829 	lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
830 	lwkt_reltoken(tok);
831 }
832 
833 /*
834  * Return a count of the number of token refs the thread has to the
835  * specified token, whether it currently owns the token or not.
836  */
837 int
838 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
839 {
840 	lwkt_tokref_t scan;
841 	int count = 0;
842 
843 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
844 		if (scan->tr_tok == tok)
845 			++count;
846 	}
847 	return(count);
848 }
849 
850 /*
851  * Pool tokens are used to provide a type-stable serializing token
852  * pointer that does not race against disappearing data structures.
853  *
854  * This routine is called in early boot just after we setup the BSP's
855  * globaldata structure.
856  */
857 void
858 lwkt_token_pool_init(void)
859 {
860 	int i;
861 
862 	for (i = 0; i < LWKT_NUM_POOL_TOKENS; ++i)
863 		lwkt_token_init(&pool_tokens[i].token, "pool");
864 }
865 
866 lwkt_token_t
867 lwkt_token_pool_lookup(void *ptr)
868 {
869 	return (_lwkt_token_pool_lookup(ptr));
870 }
871 
872 /*
873  * Initialize a token.
874  */
875 void
876 lwkt_token_init(lwkt_token_t tok, const char *desc)
877 {
878 	tok->t_count = 0;
879 	tok->t_ref = NULL;
880 	tok->t_collisions = 0;
881 	tok->t_desc = desc;
882 }
883 
884 void
885 lwkt_token_uninit(lwkt_token_t tok)
886 {
887 	/* empty */
888 }
889 
890 /*
891  * Exchange the two most recent tokens on the tokref stack.  This allows
892  * you to release a token out of order.
893  *
894  * We have to be careful about the case where the top two tokens are
895  * the same token.  In this case tok->t_ref will point to the deeper
896  * ref and must remain pointing to the deeper ref.  If we were to swap
897  * it the first release would clear the token even though a second
898  * ref is still present.
899  *
900  * Only exclusively held tokens contain a reference to the tokref which
901  * has to be flipped along with the swap.
902  */
903 void
904 lwkt_token_swap(void)
905 {
906 	lwkt_tokref_t ref1, ref2;
907 	lwkt_token_t tok1, tok2;
908 	long count1, count2;
909 	thread_t td = curthread;
910 
911 	crit_enter();
912 
913 	ref1 = td->td_toks_stop - 1;
914 	ref2 = td->td_toks_stop - 2;
915 	KKASSERT(ref1 >= &td->td_toks_base);
916 	KKASSERT(ref2 >= &td->td_toks_base);
917 
918 	tok1 = ref1->tr_tok;
919 	tok2 = ref2->tr_tok;
920 	count1 = ref1->tr_count;
921 	count2 = ref2->tr_count;
922 
923 	if (tok1 != tok2) {
924 		ref1->tr_tok = tok2;
925 		ref1->tr_count = count2;
926 		ref2->tr_tok = tok1;
927 		ref2->tr_count = count1;
928 		if (tok1->t_ref == ref1)
929 			tok1->t_ref = ref2;
930 		if (tok2->t_ref == ref2)
931 			tok2->t_ref = ref1;
932 	}
933 
934 	crit_exit();
935 }
936 
937 #ifdef DDB
938 DB_SHOW_COMMAND(tokens, db_tok_all)
939 {
940 	struct lwkt_token *tok, **ptr;
941 	struct lwkt_token *toklist[16] = {
942 		&mp_token,
943 		&pmap_token,
944 		&dev_token,
945 		&vm_token,
946 		&vmspace_token,
947 		&kvm_token,
948 		&sigio_token,
949 		&tty_token,
950 		&vnode_token,
951 		NULL
952 	};
953 
954 	ptr = toklist;
955 	for (tok = *ptr; tok; tok = *(++ptr)) {
956 		db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
957 		    (tok->t_ref ? tok->t_ref->tr_owner : NULL),
958 		    tok->t_collisions, tok->t_desc);
959 	}
960 }
961 #endif /* DDB */
962