xref: /dragonfly/sys/kern/lwkt_token.c (revision e6975a4e)
1 /*
2  * Copyright (c) 2003,2004,2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * lwkt_token - Implement soft token locks.
37  *
38  * Tokens are locks which serialize a thread only while the thread is
39  * running.  If the thread blocks all tokens are released, then reacquired
40  * when the thread resumes.
41  *
42  * This implementation requires no critical sections or spin locks, but
43  * does use atomic_cmpset_ptr().
44  *
45  * Tokens may be recursively acquired by the same thread.  However the
46  * caller must be sure to release such tokens in reverse order.
47  */
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/rtprio.h>
53 #include <sys/queue.h>
54 #include <sys/sysctl.h>
55 #include <sys/ktr.h>
56 #include <sys/kthread.h>
57 #include <machine/cpu.h>
58 #include <sys/lock.h>
59 #include <sys/spinlock.h>
60 
61 #include <sys/thread2.h>
62 #include <sys/spinlock2.h>
63 #include <sys/mplock2.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/vm_kern.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_pager.h>
72 #include <vm/vm_extern.h>
73 #include <vm/vm_zone.h>
74 
75 #include <machine/stdarg.h>
76 #include <machine/smp.h>
77 
78 #include "opt_ddb.h"
79 #ifdef DDB
80 #include <ddb/ddb.h>
81 #endif
82 
83 extern int lwkt_sched_debug;
84 
85 #ifndef LWKT_NUM_POOL_TOKENS
86 #define LWKT_NUM_POOL_TOKENS	16661
87 #endif
88 
89 struct lwkt_pool_token {
90 	struct lwkt_token	token;
91 } __cachealign;
92 
93 static struct lwkt_pool_token	pool_tokens[LWKT_NUM_POOL_TOKENS];
94 struct spinlock tok_debug_spin = SPINLOCK_INITIALIZER(&tok_debug_spin,
95 						      "tok_debug_spin");
96 
97 #define TOKEN_STRING	"REF=%p TOK=%p TD=%p"
98 #define TOKEN_ARGS	lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
99 #define CONTENDED_STRING	TOKEN_STRING " (contention started)"
100 #define UNCONTENDED_STRING	TOKEN_STRING " (contention stopped)"
101 #if !defined(KTR_TOKENS)
102 #define	KTR_TOKENS	KTR_ALL
103 #endif
104 
105 KTR_INFO_MASTER(tokens);
106 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
107 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
108 #if 0
109 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
110 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
111 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
112 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
113 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
114 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
115 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
116 #endif
117 
118 #define logtoken(name, ref)						\
119 	KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
120 
121 /*
122  * Global tokens.  These replace the MP lock for major subsystem locking.
123  * These tokens are initially used to lockup both global and individual
124  * operations.
125  *
126  * Once individual structures get their own locks these tokens are used
127  * only to protect global lists & other variables and to interlock
128  * allocations and teardowns and such.
129  *
130  * The UP initializer causes token acquisition to also acquire the MP lock
131  * for maximum compatibility.  The feature may be enabled and disabled at
132  * any time, the MP state is copied to the tokref when the token is acquired
133  * and will not race against sysctl changes.
134  */
135 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
136 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
137 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
138 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
139 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
140 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
141 struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
142 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
143 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
144 
145 /*
146  * Exponential backoff (exclusive tokens) and TSC windowing (shared tokens)
147  * parameters.  Remember that tokens backoff to the scheduler, large values
148  * not recommended.
149  */
150 static int token_backoff_max __cachealign = 4096;
151 SYSCTL_INT(_lwkt, OID_AUTO, token_backoff_max, CTLFLAG_RW,
152     &token_backoff_max, 0, "Tokens exponential backoff");
153 static int token_window_shift __cachealign = 8;
154 SYSCTL_INT(_lwkt, OID_AUTO, token_window_shift, CTLFLAG_RW,
155     &token_window_shift, 0, "Tokens TSC windowing shift");
156 
157 /*
158  * The collision count is bumped every time the LWKT scheduler fails
159  * to acquire needed tokens in addition to a normal lwkt_gettoken()
160  * stall.
161  */
162 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
163     &mp_token.t_collisions, 0, "Collision counter of mp_token");
164 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
165     &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
166 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
167     &dev_token.t_collisions, 0, "Collision counter of dev_token");
168 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
169     &vm_token.t_collisions, 0, "Collision counter of vm_token");
170 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
171     &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
172 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
173     &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
174 SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
175     &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
176 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
177     &tty_token.t_collisions, 0, "Collision counter of tty_token");
178 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
179     &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
180 
181 int tokens_debug_output;
182 SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
183     &tokens_debug_output, 0, "Generate stack trace N times");
184 
185 static int _lwkt_getalltokens_sorted(thread_t td);
186 
187 /*
188  * Acquire the initial mplock
189  *
190  * (low level boot only)
191  */
192 void
193 cpu_get_initial_mplock(void)
194 {
195 	KKASSERT(mp_token.t_ref == NULL);
196 	if (lwkt_trytoken(&mp_token) == FALSE)
197 		panic("cpu_get_initial_mplock");
198 }
199 
200 /*
201  * Return a pool token given an address.  Use a prime number to reduce
202  * overlaps.
203  */
204 static __inline
205 lwkt_token_t
206 _lwkt_token_pool_lookup(void *ptr)
207 {
208 	uint32_t i;
209 
210 	i = (uint32_t)(uintptr_t)ptr % LWKT_NUM_POOL_TOKENS;
211 	return (&pool_tokens[i].token);
212 }
213 
214 /*
215  * Initialize a tokref_t prior to making it visible in the thread's
216  * token array.
217  */
218 static __inline
219 void
220 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
221 {
222 	ref->tr_tok = tok;
223 	ref->tr_count = excl;
224 	ref->tr_owner = td;
225 }
226 
227 /*
228  * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
229  * FALSE on failure.
230  *
231  * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
232  * token, otherwise are attempting to get a shared token.
233  *
234  * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
235  * it is a non-blocking operation (for both exclusive or shared acquisions).
236  */
237 static __inline
238 int
239 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
240 {
241 	lwkt_token_t tok;
242 	lwkt_tokref_t oref;
243 	long count;
244 
245 	tok = ref->tr_tok;
246 	KASSERT(((mode & TOK_EXCLREQ) == 0 ||	/* non blocking */
247 		td->td_gd->gd_intr_nesting_level == 0 ||
248 		panic_cpu_gd == mycpu),
249 		("Attempt to acquire token %p not already "
250 		"held in hard code section", tok));
251 
252 	if (mode & TOK_EXCLUSIVE) {
253 		/*
254 		 * Attempt to get an exclusive token
255 		 */
256 		count = tok->t_count;
257 
258 		for (;;) {
259 			oref = tok->t_ref;	/* can be NULL */
260 			cpu_ccfence();
261 			if ((count & ~TOK_EXCLREQ) == 0) {
262 				/*
263 				 * It is possible to get the exclusive bit.
264 				 * We must clear TOK_EXCLREQ on successful
265 				 * acquisition.
266 				 */
267 				if (atomic_fcmpset_long(&tok->t_count, &count,
268 						        (count & ~TOK_EXCLREQ) |
269 						        TOK_EXCLUSIVE)) {
270 					KKASSERT(tok->t_ref == NULL);
271 					tok->t_ref = ref;
272 					return TRUE;
273 				}
274 				/* retry */
275 			} else if ((count & TOK_EXCLUSIVE) &&
276 				   oref >= &td->td_toks_base &&
277 				   oref < td->td_toks_stop) {
278 				/*
279 				 * Our thread already holds the exclusive
280 				 * bit, we treat this tokref as a shared
281 				 * token (sorta) to make the token release
282 				 * code easier.  Treating this as a shared
283 				 * token allows us to simply increment the
284 				 * count field.
285 				 *
286 				 * NOTE: oref cannot race above if it
287 				 *	 happens to be ours, so we're good.
288 				 *	 But we must still have a stable
289 				 *	 variable for both parts of the
290 				 *	 comparison.
291 				 *
292 				 * NOTE: Since we already have an exclusive
293 				 *	 lock and don't need to check EXCLREQ
294 				 *	 we can just use an atomic_add here
295 				 */
296 				atomic_add_long(&tok->t_count, TOK_INCR);
297 				ref->tr_count &= ~TOK_EXCLUSIVE;
298 				return TRUE;
299 			} else if ((mode & TOK_EXCLREQ) &&
300 				   (count & TOK_EXCLREQ) == 0) {
301 				/*
302 				 * Unable to get the exclusive bit but being
303 				 * asked to set the exclusive-request bit.
304 				 * Since we are going to retry anyway just
305 				 * set the bit unconditionally.
306 				 */
307 				atomic_set_long(&tok->t_count, TOK_EXCLREQ);
308 				return FALSE;
309 			} else {
310 				/*
311 				 * Unable to get the exclusive bit and not
312 				 * being asked to set the exclusive-request
313 				 * (aka lwkt_trytoken()), or EXCLREQ was
314 				 * already set.
315 				 */
316 				cpu_pause();
317 				return FALSE;
318 			}
319 			/* retry */
320 		}
321 	} else {
322 		/*
323 		 * Attempt to get a shared token.  Note that TOK_EXCLREQ
324 		 * for shared tokens simply means the caller intends to
325 		 * block.  We never actually set the bit in tok->t_count.
326 		 *
327 		 * Due to the token's no-deadlock guarantee, and complications
328 		 * created by the sorted reacquisition code, we can only
329 		 * give exclusive requests priority over shared requests
330 		 * in situations where the thread holds only one token.
331 		 */
332 		count = tok->t_count;
333 
334 		for (;;) {
335 			oref = tok->t_ref;	/* can be NULL */
336 			cpu_ccfence();
337 			if ((count & (TOK_EXCLUSIVE|mode)) == 0 ||
338 			    ((count & TOK_EXCLUSIVE) == 0 &&
339 			    td->td_toks_stop != &td->td_toks_base + 1)
340 			) {
341 				/*
342 				 * It may be possible to get the token shared.
343 				 */
344 				if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
345 					return TRUE;
346 				}
347 				count = atomic_fetchadd_long(&tok->t_count,
348 							     -TOK_INCR);
349 				count -= TOK_INCR;
350 				/* retry */
351 			} else if ((count & TOK_EXCLUSIVE) &&
352 				   oref >= &td->td_toks_base &&
353 				   oref < td->td_toks_stop) {
354 				/*
355 				 * We own the exclusive bit on the token so
356 				 * we can in fact also get it shared.
357 				 */
358 				atomic_add_long(&tok->t_count, TOK_INCR);
359 				return TRUE;
360 			} else {
361 				/*
362 				 * We failed to get the token shared
363 				 */
364 				return FALSE;
365 			}
366 			/* retry */
367 		}
368 	}
369 }
370 
371 static __inline
372 int
373 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
374 {
375 	if (_lwkt_trytokref(ref, td, mode))
376 		return TRUE;
377 
378 	if (mode & TOK_EXCLUSIVE) {
379 		/*
380 		 * Contested exclusive token, use exponential backoff
381 		 * algorithm.
382 		 */
383 		long expbackoff;
384 		long loop;
385 
386 		expbackoff = 0;
387 		while (expbackoff < 6 + token_backoff_max) {
388 			expbackoff = (expbackoff + 1) * 3 / 2;
389 			if ((rdtsc() >> token_window_shift) % ncpus != mycpuid)  {
390 				for (loop = expbackoff; loop; --loop)
391 					cpu_pause();
392 			}
393 			if (_lwkt_trytokref(ref, td, mode))
394 				return TRUE;
395 		}
396 	} else {
397 		/*
398 		 * Contested shared token, use TSC windowing.  Note that
399 		 * exclusive tokens have priority over shared tokens only
400 		 * for the first token.
401 		 */
402 		if ((rdtsc() >> token_window_shift) % ncpus == mycpuid) {
403 			if (_lwkt_trytokref(ref, td, mode & ~TOK_EXCLREQ))
404 				return TRUE;
405 		} else {
406 			if (_lwkt_trytokref(ref, td, mode))
407 				return TRUE;
408 		}
409 
410 	}
411 	++mycpu->gd_cnt.v_lock_colls;
412 
413 	return FALSE;
414 }
415 
416 /*
417  * Release a token that we hold.
418  *
419  * Since tokens are polled, we don't have to deal with wakeups and releasing
420  * is really easy.
421  */
422 static __inline
423 void
424 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
425 {
426 	lwkt_token_t tok;
427 	long count;
428 
429 	tok = ref->tr_tok;
430 	if (tok->t_ref == ref) {
431 		/*
432 		 * We are an exclusive holder.  We must clear tr_ref
433 		 * before we clear the TOK_EXCLUSIVE bit.  If we are
434 		 * unable to clear the bit we must restore
435 		 * tok->t_ref.
436 		 */
437 #if 0
438 		KKASSERT(count & TOK_EXCLUSIVE);
439 #endif
440 		tok->t_ref = NULL;
441 		atomic_clear_long(&tok->t_count, TOK_EXCLUSIVE);
442 	} else {
443 		/*
444 		 * We are a shared holder
445 		 */
446 		count = atomic_fetchadd_long(&tok->t_count, -TOK_INCR);
447 		KKASSERT(count & TOK_COUNTMASK);	/* count prior */
448 	}
449 }
450 
451 /*
452  * Obtain all the tokens required by the specified thread on the current
453  * cpu, return 0 on failure and non-zero on success.  If a failure occurs
454  * any partially acquired tokens will be released prior to return.
455  *
456  * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
457  * tokens that the thread had to release when it switched away.
458  *
459  * If spinning is non-zero this function acquires the tokens in a particular
460  * order to deal with potential deadlocks.  We simply use address order for
461  * the case.
462  *
463  * Called from a critical section.
464  */
465 int
466 lwkt_getalltokens(thread_t td, int spinning)
467 {
468 	lwkt_tokref_t scan;
469 	lwkt_token_t tok;
470 
471 	if (spinning)
472 		return(_lwkt_getalltokens_sorted(td));
473 
474 	/*
475 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
476 	 */
477 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
478 		tok = scan->tr_tok;
479 		for (;;) {
480 			/*
481 			 * Only try really hard on the last token
482 			 */
483 			if (scan == td->td_toks_stop - 1) {
484 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
485 				    break;
486 			} else {
487 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
488 				    break;
489 			}
490 
491 			/*
492 			 * Otherwise we failed to acquire all the tokens.
493 			 * Release whatever we did get.
494 			 */
495 			KASSERT(tok->t_desc,
496 				("token %p is not initialized", tok));
497 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
498 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
499 				tok->t_desc,
500 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
501 			if (lwkt_sched_debug > 0) {
502 				--lwkt_sched_debug;
503 				kprintf("toka %p %s %s\n",
504 					tok, tok->t_desc, td->td_comm);
505 			}
506 			td->td_wmesg = tok->t_desc;
507 			++tok->t_collisions;
508 			while (--scan >= &td->td_toks_base)
509 				_lwkt_reltokref(scan, td);
510 			return(FALSE);
511 		}
512 	}
513 	return (TRUE);
514 }
515 
516 /*
517  * Release all tokens owned by the specified thread on the current cpu.
518  *
519  * This code is really simple.  Even in cases where we own all the tokens
520  * note that t_ref may not match the scan for recursively held tokens which
521  * are held deeper in the stack, or for the case where a lwkt_getalltokens()
522  * failed.
523  *
524  * Tokens are released in reverse order to reduce chasing race failures.
525  *
526  * Called from a critical section.
527  */
528 void
529 lwkt_relalltokens(thread_t td)
530 {
531 	lwkt_tokref_t scan;
532 
533 	/*
534 	 * Weird order is to try to avoid a panic loop
535 	 */
536 	if (td->td_toks_have) {
537 		scan = td->td_toks_have;
538 		td->td_toks_have = NULL;
539 	} else {
540 		scan = td->td_toks_stop;
541 	}
542 	while (--scan >= &td->td_toks_base)
543 		_lwkt_reltokref(scan, td);
544 }
545 
546 /*
547  * This is the decontention version of lwkt_getalltokens().  The tokens are
548  * acquired in address-sorted order to deal with any deadlocks.  Ultimately
549  * token failures will spin into the scheduler and get here.
550  *
551  * Called from critical section
552  */
553 static
554 int
555 _lwkt_getalltokens_sorted(thread_t td)
556 {
557 	lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
558 	lwkt_tokref_t scan;
559 	lwkt_token_t tok;
560 	int i;
561 	int j;
562 	int n;
563 
564 	/*
565 	 * Sort the token array.  Yah yah, I know this isn't fun.
566 	 *
567 	 * NOTE: Recursively acquired tokens are ordered the same as in the
568 	 *	 td_toks_array so we can always get the earliest one first.
569 	 *	 This is particularly important when a token is acquired
570 	 *	 exclusively multiple times, as only the first acquisition
571 	 *	 is treated as an exclusive token.
572 	 */
573 	i = 0;
574 	scan = &td->td_toks_base;
575 	while (scan < td->td_toks_stop) {
576 		for (j = 0; j < i; ++j) {
577 			if (scan->tr_tok < sort_array[j]->tr_tok)
578 				break;
579 		}
580 		if (j != i) {
581 			bcopy(sort_array + j, sort_array + j + 1,
582 			      (i - j) * sizeof(lwkt_tokref_t));
583 		}
584 		sort_array[j] = scan;
585 		++scan;
586 		++i;
587 	}
588 	n = i;
589 
590 	/*
591 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
592 	 */
593 	for (i = 0; i < n; ++i) {
594 		scan = sort_array[i];
595 		tok = scan->tr_tok;
596 		for (;;) {
597 			/*
598 			 * Only try really hard on the last token
599 			 */
600 			if (scan == td->td_toks_stop - 1) {
601 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
602 				    break;
603 			} else {
604 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
605 				    break;
606 			}
607 
608 			/*
609 			 * Otherwise we failed to acquire all the tokens.
610 			 * Release whatever we did get.
611 			 */
612 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
613 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
614 				tok->t_desc,
615 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
616 			if (lwkt_sched_debug > 0) {
617 				--lwkt_sched_debug;
618 				kprintf("tokb %p %s %s\n",
619 					tok, tok->t_desc, td->td_comm);
620 			}
621 			td->td_wmesg = tok->t_desc;
622 			++tok->t_collisions;
623 			while (--i >= 0) {
624 				scan = sort_array[i];
625 				_lwkt_reltokref(scan, td);
626 			}
627 			return(FALSE);
628 		}
629 	}
630 
631 	/*
632 	 * We were successful, there is no need for another core to signal
633 	 * us.
634 	 */
635 	return (TRUE);
636 }
637 
638 /*
639  * Get a serializing token.  This routine can block.
640  */
641 void
642 lwkt_gettoken(lwkt_token_t tok)
643 {
644 	thread_t td = curthread;
645 	lwkt_tokref_t ref;
646 
647 	ref = td->td_toks_stop;
648 	KKASSERT(ref < &td->td_toks_end);
649 	++td->td_toks_stop;
650 	cpu_ccfence();
651 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
652 
653 #ifdef DEBUG_LOCKS
654 	/*
655 	 * Taking an exclusive token after holding it shared will
656 	 * livelock. Scan for that case and assert.
657 	 */
658 	lwkt_tokref_t tk;
659 	int found = 0;
660 	for (tk = &td->td_toks_base; tk < ref; tk++) {
661 		if (tk->tr_tok != tok)
662 			continue;
663 
664 		found++;
665 		if (tk->tr_count & TOK_EXCLUSIVE)
666 			goto good;
667 	}
668 	/* We found only shared instances of this token if found >0 here */
669 	KASSERT((found == 0), ("Token %p s/x livelock", tok));
670 good:
671 #endif
672 
673 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
674 		return;
675 
676 	/*
677 	 * Give up running if we can't acquire the token right now.
678 	 *
679 	 * Since the tokref is already active the scheduler now
680 	 * takes care of acquisition, so we need only call
681 	 * lwkt_switch().
682 	 *
683 	 * Since we failed this was not a recursive token so upon
684 	 * return tr_tok->t_ref should be assigned to this specific
685 	 * ref.
686 	 */
687 	td->td_wmesg = tok->t_desc;
688 	++tok->t_collisions;
689 	logtoken(fail, ref);
690 	td->td_toks_have = td->td_toks_stop - 1;
691 
692 	if (tokens_debug_output > 0) {
693 		--tokens_debug_output;
694 		spin_lock(&tok_debug_spin);
695 		kprintf("Excl Token thread %p %s %s\n",
696 			td, tok->t_desc, td->td_comm);
697 		print_backtrace(6);
698 		kprintf("\n");
699 		spin_unlock(&tok_debug_spin);
700 	}
701 
702 	atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
703 	lwkt_switch();
704 	logtoken(succ, ref);
705 	KKASSERT(tok->t_ref == ref);
706 }
707 
708 /*
709  * Similar to gettoken but we acquire a shared token instead of an exclusive
710  * token.
711  */
712 void
713 lwkt_gettoken_shared(lwkt_token_t tok)
714 {
715 	thread_t td = curthread;
716 	lwkt_tokref_t ref;
717 
718 	ref = td->td_toks_stop;
719 	KKASSERT(ref < &td->td_toks_end);
720 	++td->td_toks_stop;
721 	cpu_ccfence();
722 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
723 
724 #ifdef DEBUG_LOCKS
725         /*
726          * Taking a pool token in shared mode is a bad idea; other
727          * addresses deeper in the call stack may hash to the same pool
728          * token and you may end up with an exclusive-shared livelock.
729          * Warn in this condition.
730          */
731         if ((tok >= &pool_tokens[0].token) &&
732             (tok < &pool_tokens[LWKT_NUM_POOL_TOKENS].token))
733                 kprintf("Warning! Taking pool token %p in shared mode\n", tok);
734 #endif
735 
736 
737 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
738 		return;
739 
740 	/*
741 	 * Give up running if we can't acquire the token right now.
742 	 *
743 	 * Since the tokref is already active the scheduler now
744 	 * takes care of acquisition, so we need only call
745 	 * lwkt_switch().
746 	 *
747 	 * Since we failed this was not a recursive token so upon
748 	 * return tr_tok->t_ref should be assigned to this specific
749 	 * ref.
750 	 */
751 	td->td_wmesg = tok->t_desc;
752 	++tok->t_collisions;
753 	logtoken(fail, ref);
754 	td->td_toks_have = td->td_toks_stop - 1;
755 
756 	if (tokens_debug_output > 0) {
757 		--tokens_debug_output;
758 		spin_lock(&tok_debug_spin);
759 		kprintf("Shar Token thread %p %s %s\n",
760 			td, tok->t_desc, td->td_comm);
761 		print_backtrace(6);
762 		kprintf("\n");
763 		spin_unlock(&tok_debug_spin);
764 	}
765 
766 	atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
767 	lwkt_switch();
768 	logtoken(succ, ref);
769 }
770 
771 /*
772  * Attempt to acquire a token, return TRUE on success, FALSE on failure.
773  *
774  * We setup the tokref in case we actually get the token (if we switch later
775  * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
776  * TOK_EXCLREQ in case we fail.
777  */
778 int
779 lwkt_trytoken(lwkt_token_t tok)
780 {
781 	thread_t td = curthread;
782 	lwkt_tokref_t ref;
783 
784 	ref = td->td_toks_stop;
785 	KKASSERT(ref < &td->td_toks_end);
786 	++td->td_toks_stop;
787 	cpu_ccfence();
788 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
789 
790 	if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
791 		return TRUE;
792 
793 	/*
794 	 * Failed, unpend the request
795 	 */
796 	cpu_ccfence();
797 	--td->td_toks_stop;
798 	++tok->t_collisions;
799 	return FALSE;
800 }
801 
802 lwkt_token_t
803 lwkt_getpooltoken(void *ptr)
804 {
805 	lwkt_token_t tok;
806 
807 	tok = _lwkt_token_pool_lookup(ptr);
808 	lwkt_gettoken(tok);
809 	return (tok);
810 }
811 
812 /*
813  * Release a serializing token.
814  *
815  * WARNING!  All tokens must be released in reverse order.  This will be
816  *	     asserted.
817  */
818 void
819 lwkt_reltoken(lwkt_token_t tok)
820 {
821 	thread_t td = curthread;
822 	lwkt_tokref_t ref;
823 
824 	/*
825 	 * Remove ref from thread token list and assert that it matches
826 	 * the token passed in.  Tokens must be released in reverse order.
827 	 */
828 	ref = td->td_toks_stop - 1;
829 	KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok);
830 	_lwkt_reltokref(ref, td);
831 	cpu_sfence();
832 	td->td_toks_stop = ref;
833 }
834 
835 /*
836  * It is faster for users of lwkt_getpooltoken() to use the returned
837  * token and just call lwkt_reltoken(), but for convenience we provide
838  * this function which looks the token up based on the ident.
839  */
840 void
841 lwkt_relpooltoken(void *ptr)
842 {
843 	lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
844 	lwkt_reltoken(tok);
845 }
846 
847 /*
848  * Return a count of the number of token refs the thread has to the
849  * specified token, whether it currently owns the token or not.
850  */
851 int
852 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
853 {
854 	lwkt_tokref_t scan;
855 	int count = 0;
856 
857 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
858 		if (scan->tr_tok == tok)
859 			++count;
860 	}
861 	return(count);
862 }
863 
864 /*
865  * Pool tokens are used to provide a type-stable serializing token
866  * pointer that does not race against disappearing data structures.
867  *
868  * This routine is called in early boot just after we setup the BSP's
869  * globaldata structure.
870  */
871 void
872 lwkt_token_pool_init(void)
873 {
874 	int i;
875 
876 	for (i = 0; i < LWKT_NUM_POOL_TOKENS; ++i)
877 		lwkt_token_init(&pool_tokens[i].token, "pool");
878 }
879 
880 lwkt_token_t
881 lwkt_token_pool_lookup(void *ptr)
882 {
883 	return (_lwkt_token_pool_lookup(ptr));
884 }
885 
886 /*
887  * Initialize a token.
888  */
889 void
890 lwkt_token_init(lwkt_token_t tok, const char *desc)
891 {
892 	tok->t_count = 0;
893 	tok->t_ref = NULL;
894 	tok->t_collisions = 0;
895 	tok->t_desc = desc;
896 }
897 
898 void
899 lwkt_token_uninit(lwkt_token_t tok)
900 {
901 	/* empty */
902 }
903 
904 /*
905  * Exchange the two most recent tokens on the tokref stack.  This allows
906  * you to release a token out of order.
907  *
908  * We have to be careful about the case where the top two tokens are
909  * the same token.  In this case tok->t_ref will point to the deeper
910  * ref and must remain pointing to the deeper ref.  If we were to swap
911  * it the first release would clear the token even though a second
912  * ref is still present.
913  *
914  * Only exclusively held tokens contain a reference to the tokref which
915  * has to be flipped along with the swap.
916  */
917 void
918 lwkt_token_swap(void)
919 {
920 	lwkt_tokref_t ref1, ref2;
921 	lwkt_token_t tok1, tok2;
922 	long count1, count2;
923 	thread_t td = curthread;
924 
925 	crit_enter();
926 
927 	ref1 = td->td_toks_stop - 1;
928 	ref2 = td->td_toks_stop - 2;
929 	KKASSERT(ref1 >= &td->td_toks_base);
930 	KKASSERT(ref2 >= &td->td_toks_base);
931 
932 	tok1 = ref1->tr_tok;
933 	tok2 = ref2->tr_tok;
934 	count1 = ref1->tr_count;
935 	count2 = ref2->tr_count;
936 
937 	if (tok1 != tok2) {
938 		ref1->tr_tok = tok2;
939 		ref1->tr_count = count2;
940 		ref2->tr_tok = tok1;
941 		ref2->tr_count = count1;
942 		if (tok1->t_ref == ref1)
943 			tok1->t_ref = ref2;
944 		if (tok2->t_ref == ref2)
945 			tok2->t_ref = ref1;
946 	}
947 
948 	crit_exit();
949 }
950 
951 #ifdef DDB
952 DB_SHOW_COMMAND(tokens, db_tok_all)
953 {
954 	struct lwkt_token *tok, **ptr;
955 	struct lwkt_token *toklist[16] = {
956 		&mp_token,
957 		&pmap_token,
958 		&dev_token,
959 		&vm_token,
960 		&vmspace_token,
961 		&kvm_token,
962 		&sigio_token,
963 		&tty_token,
964 		&vnode_token,
965 		NULL
966 	};
967 
968 	ptr = toklist;
969 	for (tok = *ptr; tok; tok = *(++ptr)) {
970 		db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
971 		    (tok->t_ref ? tok->t_ref->tr_owner : NULL),
972 		    tok->t_collisions, tok->t_desc);
973 	}
974 }
975 #endif /* DDB */
976