xref: /dragonfly/sys/kern/lwkt_token.c (revision 63e03116)
1 /*
2  * Copyright (c) 2003-2006,2009-2019 The DragonFly Project.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The DragonFly Project
6  * by Matthew Dillon <dillon@backplane.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * lwkt_token - Implement soft token locks.
38  *
39  * Tokens are locks which serialize a thread only while the thread is
40  * running.  If the thread blocks all tokens are released, then reacquired
41  * when the thread resumes.
42  *
43  * This implementation requires no critical sections or spin locks, but
44  * does use atomic_cmpset_ptr().
45  *
46  * Tokens may be recursively acquired by the same thread.  However the
47  * caller must be sure to release such tokens in reverse order.
48  */
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/kernel.h>
52 #include <sys/proc.h>
53 #include <sys/rtprio.h>
54 #include <sys/queue.h>
55 #include <sys/sysctl.h>
56 #include <sys/ktr.h>
57 #include <sys/kthread.h>
58 #include <machine/cpu.h>
59 #include <sys/lock.h>
60 #include <sys/spinlock.h>
61 
62 #include <sys/thread2.h>
63 #include <sys/spinlock2.h>
64 #include <sys/mplock2.h>
65 
66 #include <vm/vm.h>
67 #include <vm/vm_param.h>
68 #include <vm/vm_kern.h>
69 #include <vm/vm_object.h>
70 #include <vm/vm_page.h>
71 #include <vm/vm_map.h>
72 #include <vm/vm_pager.h>
73 #include <vm/vm_extern.h>
74 #include <vm/vm_zone.h>
75 
76 #include <machine/stdarg.h>
77 #include <machine/smp.h>
78 
79 #include "opt_ddb.h"
80 #ifdef DDB
81 #include <ddb/ddb.h>
82 #endif
83 
84 extern int lwkt_sched_debug;
85 
86 #define LWKT_POOL_TOKENS	16384		/* must be power of 2 */
87 #define LWKT_POOL_MASK		(LWKT_POOL_TOKENS - 1)
88 
89 struct lwkt_pool_token {
90 	struct lwkt_token	token;
91 } __cachealign;
92 
93 static struct lwkt_pool_token	pool_tokens[LWKT_POOL_TOKENS];
94 static struct spinlock		tok_debug_spin =
95     SPINLOCK_INITIALIZER(&tok_debug_spin, "tok_debug_spin");
96 
97 #define TOKEN_STRING	"REF=%p TOK=%p TD=%p"
98 #define TOKEN_ARGS	lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
99 #define CONTENDED_STRING	TOKEN_STRING " (contention started)"
100 #define UNCONTENDED_STRING	TOKEN_STRING " (contention stopped)"
101 #if !defined(KTR_TOKENS)
102 #define	KTR_TOKENS	KTR_ALL
103 #endif
104 
105 KTR_INFO_MASTER(tokens);
106 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
107 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
108 #if 0
109 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
110 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
111 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
112 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
113 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
114 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
115 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
116 #endif
117 
118 #define logtoken(name, ref)						\
119 	KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
120 
121 /*
122  * Global tokens.  These replace the MP lock for major subsystem locking.
123  * These tokens are initially used to lockup both global and individual
124  * operations.
125  *
126  * Once individual structures get their own locks these tokens are used
127  * only to protect global lists & other variables and to interlock
128  * allocations and teardowns and such.
129  *
130  * The UP initializer causes token acquisition to also acquire the MP lock
131  * for maximum compatibility.  The feature may be enabled and disabled at
132  * any time, the MP state is copied to the tokref when the token is acquired
133  * and will not race against sysctl changes.
134  */
135 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
136 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
137 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
138 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
139 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
140 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
141 struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
142 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
143 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
144 struct lwkt_token vga_token = LWKT_TOKEN_INITIALIZER(vga_token);
145 struct lwkt_token kbd_token = LWKT_TOKEN_INITIALIZER(kbd_token);
146 
147 /*
148  * Exponential backoff (exclusive tokens) and TSC windowing (shared tokens)
149  * parameters.  Remember that tokens backoff to the scheduler.  This is a bit
150  * of trade-off.  Smaller values like 128 work better in some situations,
151  * but under extreme loads larger values like 4096 seem to provide the most
152  * determinism.
153  */
154 static int token_backoff_max __cachealign = 4096;
155 SYSCTL_INT(_lwkt, OID_AUTO, token_backoff_max, CTLFLAG_RW,
156     &token_backoff_max, 0, "Tokens exponential backoff");
157 static int token_window_shift __cachealign = 8;
158 SYSCTL_INT(_lwkt, OID_AUTO, token_window_shift, CTLFLAG_RW,
159     &token_window_shift, 0, "Tokens TSC windowing shift");
160 
161 /*
162  * The collision count is bumped every time the LWKT scheduler fails
163  * to acquire needed tokens in addition to a normal lwkt_gettoken()
164  * stall.
165  */
166 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
167     &mp_token.t_collisions, 0, "Collision counter of mp_token");
168 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
169     &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
170 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
171     &dev_token.t_collisions, 0, "Collision counter of dev_token");
172 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
173     &vm_token.t_collisions, 0, "Collision counter of vm_token");
174 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
175     &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
176 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
177     &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
178 SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
179     &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
180 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
181     &tty_token.t_collisions, 0, "Collision counter of tty_token");
182 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
183     &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
184 
185 static int tokens_debug_output;
186 SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
187     &tokens_debug_output, 0, "Generate stack trace N times");
188 
189 static int _lwkt_getalltokens_sorted(thread_t td);
190 
191 /*
192  * Acquire the initial mplock
193  *
194  * (low level boot only)
195  */
196 void
197 cpu_get_initial_mplock(void)
198 {
199 	KKASSERT(mp_token.t_ref == NULL);
200 	if (lwkt_trytoken(&mp_token) == FALSE)
201 		panic("cpu_get_initial_mplock");
202 }
203 
204 /*
205  * Return a pool token given an address.  Use a prime number to reduce
206  * overlaps.
207  */
208 #define POOL_HASH_PRIME1	66555444443333333ULL
209 #define POOL_HASH_PRIME2	989042931893ULL
210 
211 static __inline
212 lwkt_token_t
213 _lwkt_token_pool_lookup(void *ptr)
214 {
215 	uintptr_t hash1;
216 	uintptr_t hash2;
217 
218 	hash1 = (uintptr_t)ptr + ((uintptr_t)ptr >> 18);
219 	hash1 %= POOL_HASH_PRIME1;
220 	hash2 = ((uintptr_t)ptr >> 8) + ((uintptr_t)ptr >> 24);
221 	hash2 %= POOL_HASH_PRIME2;
222 	return (&pool_tokens[(hash1 ^ hash2) & LWKT_POOL_MASK].token);
223 }
224 
225 /*
226  * Initialize a tokref_t prior to making it visible in the thread's
227  * token array.
228  */
229 static __inline
230 void
231 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
232 {
233 	ref->tr_tok = tok;
234 	ref->tr_count = excl;
235 	ref->tr_owner = td;
236 }
237 
238 /*
239  * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
240  * FALSE on failure.
241  *
242  * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
243  * token, otherwise are attempting to get a shared token.
244  *
245  * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
246  * it is a non-blocking operation (for both exclusive or shared acquisions).
247  */
248 static __inline
249 int
250 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
251 {
252 	lwkt_token_t tok;
253 	lwkt_tokref_t oref;
254 	long count;
255 
256 	tok = ref->tr_tok;
257 	KASSERT(((mode & TOK_EXCLREQ) == 0 ||	/* non blocking */
258 		td->td_gd->gd_intr_nesting_level == 0 ||
259 		panic_cpu_gd == mycpu),
260 		("Attempt to acquire token %p not already "
261 		"held in hard code section", tok));
262 
263 	if (mode & TOK_EXCLUSIVE) {
264 		/*
265 		 * Attempt to get an exclusive token
266 		 */
267 		count = tok->t_count;
268 
269 		for (;;) {
270 			oref = tok->t_ref;	/* can be NULL */
271 			cpu_ccfence();
272 			if ((count & ~TOK_EXCLREQ) == 0) {
273 				/*
274 				 * It is possible to get the exclusive bit.
275 				 * We must clear TOK_EXCLREQ on successful
276 				 * acquisition.
277 				 */
278 				if (atomic_fcmpset_long(&tok->t_count, &count,
279 							(count & ~TOK_EXCLREQ) |
280 							TOK_EXCLUSIVE)) {
281 					KKASSERT(tok->t_ref == NULL);
282 					tok->t_ref = ref;
283 					return TRUE;
284 				}
285 				/* retry */
286 			} else if ((count & TOK_EXCLUSIVE) &&
287 				   oref >= &td->td_toks_base &&
288 				   oref < td->td_toks_stop) {
289 				/*
290 				 * Our thread already holds the exclusive
291 				 * bit, we treat this tokref as a shared
292 				 * token (sorta) to make the token release
293 				 * code easier.  Treating this as a shared
294 				 * token allows us to simply increment the
295 				 * count field.
296 				 *
297 				 * NOTE: oref cannot race above if it
298 				 *	 happens to be ours, so we're good.
299 				 *	 But we must still have a stable
300 				 *	 variable for both parts of the
301 				 *	 comparison.
302 				 *
303 				 * NOTE: Since we already have an exclusive
304 				 *	 lock and don't need to check EXCLREQ
305 				 *	 we can just use an atomic_add here
306 				 */
307 				atomic_add_long(&tok->t_count, TOK_INCR);
308 				ref->tr_count &= ~TOK_EXCLUSIVE;
309 				return TRUE;
310 			} else if ((mode & TOK_EXCLREQ) &&
311 				   (count & TOK_EXCLREQ) == 0) {
312 				/*
313 				 * Unable to get the exclusive bit but being
314 				 * asked to set the exclusive-request bit.
315 				 * Since we are going to retry anyway just
316 				 * set the bit unconditionally.
317 				 */
318 				atomic_set_long(&tok->t_count, TOK_EXCLREQ);
319 				return FALSE;
320 			} else {
321 				/*
322 				 * Unable to get the exclusive bit and not
323 				 * being asked to set the exclusive-request
324 				 * (aka lwkt_trytoken()), or EXCLREQ was
325 				 * already set.
326 				 */
327 				cpu_pause();
328 				return FALSE;
329 			}
330 			/* retry */
331 		}
332 	} else {
333 		/*
334 		 * Attempt to get a shared token.  Note that TOK_EXCLREQ
335 		 * for shared tokens simply means the caller intends to
336 		 * block.  We never actually set the bit in tok->t_count.
337 		 *
338 		 * Due to the token's no-deadlock guarantee, and complications
339 		 * created by the sorted reacquisition code, we can only
340 		 * give exclusive requests priority over shared requests
341 		 * in situations where the thread holds only one token.
342 		 */
343 		count = tok->t_count;
344 
345 		for (;;) {
346 			oref = tok->t_ref;	/* can be NULL */
347 			cpu_ccfence();
348 			if ((count & (TOK_EXCLUSIVE|mode)) == 0 ||
349 			    ((count & TOK_EXCLUSIVE) == 0 &&
350 			    td->td_toks_stop != &td->td_toks_base + 1)
351 			) {
352 				/*
353 				 * It may be possible to get the token shared.
354 				 */
355 				if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
356 					return TRUE;
357 				}
358 				count = atomic_fetchadd_long(&tok->t_count,
359 							     -TOK_INCR);
360 				count -= TOK_INCR;
361 				/* retry */
362 			} else if ((count & TOK_EXCLUSIVE) &&
363 				   oref >= &td->td_toks_base &&
364 				   oref < td->td_toks_stop) {
365 				/*
366 				 * We own the exclusive bit on the token so
367 				 * we can in fact also get it shared.
368 				 */
369 				atomic_add_long(&tok->t_count, TOK_INCR);
370 				return TRUE;
371 			} else {
372 				/*
373 				 * We failed to get the token shared
374 				 */
375 				return FALSE;
376 			}
377 			/* retry */
378 		}
379 	}
380 }
381 
382 static __inline
383 int
384 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
385 {
386 	if (_lwkt_trytokref(ref, td, mode))
387 		return TRUE;
388 
389 	if (mode & TOK_EXCLUSIVE) {
390 		/*
391 		 * Contested exclusive token, use exponential backoff
392 		 * algorithm.
393 		 */
394 		long expbackoff;
395 		long loop;
396 
397 		expbackoff = 0;
398 		while (expbackoff < 6 + token_backoff_max) {
399 			expbackoff = (expbackoff + 1) * 3 / 2;
400 			if ((rdtsc() >> token_window_shift) % ncpus != mycpuid)  {
401 				for (loop = expbackoff; loop; --loop)
402 					cpu_pause();
403 			}
404 			if (_lwkt_trytokref(ref, td, mode))
405 				return TRUE;
406 		}
407 	} else {
408 		/*
409 		 * Contested shared token, use TSC windowing.  Note that
410 		 * exclusive tokens have priority over shared tokens only
411 		 * for the first token.
412 		 */
413 		if ((rdtsc() >> token_window_shift) % ncpus == mycpuid) {
414 			if (_lwkt_trytokref(ref, td, mode & ~TOK_EXCLREQ))
415 				return TRUE;
416 		} else {
417 			if (_lwkt_trytokref(ref, td, mode))
418 				return TRUE;
419 		}
420 
421 	}
422 	++mycpu->gd_cnt.v_lock_colls;
423 
424 	return FALSE;
425 }
426 
427 /*
428  * Release a token that we hold.
429  *
430  * Since tokens are polled, we don't have to deal with wakeups and releasing
431  * is really easy.
432  */
433 static __inline
434 void
435 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
436 {
437 	lwkt_token_t tok;
438 	long count;
439 
440 	tok = ref->tr_tok;
441 	if (tok->t_ref == ref) {
442 		/*
443 		 * We are an exclusive holder.  We must clear tr_ref
444 		 * before we clear the TOK_EXCLUSIVE bit.  If we are
445 		 * unable to clear the bit we must restore
446 		 * tok->t_ref.
447 		 */
448 #if 0
449 		KKASSERT(count & TOK_EXCLUSIVE);
450 #endif
451 		tok->t_ref = NULL;
452 		atomic_clear_long(&tok->t_count, TOK_EXCLUSIVE);
453 	} else {
454 		/*
455 		 * We are a shared holder
456 		 */
457 		count = atomic_fetchadd_long(&tok->t_count, -TOK_INCR);
458 		KKASSERT(count & TOK_COUNTMASK);	/* count prior */
459 	}
460 }
461 
462 /*
463  * Obtain all the tokens required by the specified thread on the current
464  * cpu, return 0 on failure and non-zero on success.  If a failure occurs
465  * any partially acquired tokens will be released prior to return.
466  *
467  * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
468  * tokens that the thread had to release when it switched away.
469  *
470  * If spinning is non-zero this function acquires the tokens in a particular
471  * order to deal with potential deadlocks.  We simply use address order for
472  * the case.
473  *
474  * Called from a critical section.
475  */
476 int
477 lwkt_getalltokens(thread_t td, int spinning)
478 {
479 	lwkt_tokref_t scan;
480 	lwkt_token_t tok;
481 
482 	if (spinning)
483 		return(_lwkt_getalltokens_sorted(td));
484 
485 	/*
486 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
487 	 */
488 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
489 		tok = scan->tr_tok;
490 		for (;;) {
491 			/*
492 			 * Only try really hard on the last token
493 			 */
494 			if (scan == td->td_toks_stop - 1) {
495 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
496 				    break;
497 			} else {
498 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
499 				    break;
500 			}
501 
502 			/*
503 			 * Otherwise we failed to acquire all the tokens.
504 			 * Release whatever we did get.
505 			 */
506 			KASSERT(tok->t_desc,
507 				("token %p is not initialized", tok));
508 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
509 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
510 				tok->t_desc,
511 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
512 			if (lwkt_sched_debug > 0) {
513 				--lwkt_sched_debug;
514 				kprintf("toka %p %s %s\n",
515 					tok, tok->t_desc, td->td_comm);
516 			}
517 			td->td_wmesg = tok->t_desc;
518 			++tok->t_collisions;
519 			while (--scan >= &td->td_toks_base)
520 				_lwkt_reltokref(scan, td);
521 			return(FALSE);
522 		}
523 	}
524 	return (TRUE);
525 }
526 
527 /*
528  * Release all tokens owned by the specified thread on the current cpu.
529  *
530  * This code is really simple.  Even in cases where we own all the tokens
531  * note that t_ref may not match the scan for recursively held tokens which
532  * are held deeper in the stack, or for the case where a lwkt_getalltokens()
533  * failed.
534  *
535  * Tokens are released in reverse order to reduce chasing race failures.
536  *
537  * Called from a critical section.
538  */
539 void
540 lwkt_relalltokens(thread_t td)
541 {
542 	lwkt_tokref_t scan;
543 
544 	/*
545 	 * Weird order is to try to avoid a panic loop
546 	 */
547 	if (td->td_toks_have) {
548 		scan = td->td_toks_have;
549 		td->td_toks_have = NULL;
550 	} else {
551 		scan = td->td_toks_stop;
552 	}
553 	while (--scan >= &td->td_toks_base)
554 		_lwkt_reltokref(scan, td);
555 }
556 
557 /*
558  * This is the decontention version of lwkt_getalltokens().  The tokens are
559  * acquired in address-sorted order to deal with any deadlocks.  Ultimately
560  * token failures will spin into the scheduler and get here.
561  *
562  * Called from critical section
563  */
564 static
565 int
566 _lwkt_getalltokens_sorted(thread_t td)
567 {
568 	lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
569 	lwkt_tokref_t scan;
570 	lwkt_token_t tok;
571 	int i;
572 	int j;
573 	int n;
574 
575 	/*
576 	 * Sort the token array.  Yah yah, I know this isn't fun.
577 	 *
578 	 * NOTE: Recursively acquired tokens are ordered the same as in the
579 	 *	 td_toks_array so we can always get the earliest one first.
580 	 *	 This is particularly important when a token is acquired
581 	 *	 exclusively multiple times, as only the first acquisition
582 	 *	 is treated as an exclusive token.
583 	 */
584 	i = 0;
585 	scan = &td->td_toks_base;
586 	while (scan < td->td_toks_stop) {
587 		for (j = 0; j < i; ++j) {
588 			if (scan->tr_tok < sort_array[j]->tr_tok)
589 				break;
590 		}
591 		if (j != i) {
592 			bcopy(sort_array + j, sort_array + j + 1,
593 			      (i - j) * sizeof(lwkt_tokref_t));
594 		}
595 		sort_array[j] = scan;
596 		++scan;
597 		++i;
598 	}
599 	n = i;
600 
601 	/*
602 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
603 	 */
604 	for (i = 0; i < n; ++i) {
605 		scan = sort_array[i];
606 		tok = scan->tr_tok;
607 		for (;;) {
608 			/*
609 			 * Only try really hard on the last token
610 			 */
611 			if (scan == td->td_toks_stop - 1) {
612 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
613 				    break;
614 			} else {
615 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
616 				    break;
617 			}
618 
619 			/*
620 			 * Otherwise we failed to acquire all the tokens.
621 			 * Release whatever we did get.
622 			 */
623 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
624 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
625 				tok->t_desc,
626 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
627 			if (lwkt_sched_debug > 0) {
628 				--lwkt_sched_debug;
629 				kprintf("tokb %p %s %s\n",
630 					tok, tok->t_desc, td->td_comm);
631 			}
632 			td->td_wmesg = tok->t_desc;
633 			++tok->t_collisions;
634 			while (--i >= 0) {
635 				scan = sort_array[i];
636 				_lwkt_reltokref(scan, td);
637 			}
638 			return(FALSE);
639 		}
640 	}
641 
642 	/*
643 	 * We were successful, there is no need for another core to signal
644 	 * us.
645 	 */
646 	return (TRUE);
647 }
648 
649 /*
650  * Get a serializing token.  This routine can block.
651  */
652 void
653 lwkt_gettoken(lwkt_token_t tok)
654 {
655 	thread_t td = curthread;
656 	lwkt_tokref_t ref;
657 
658 	ref = td->td_toks_stop;
659 	KKASSERT(ref < &td->td_toks_end);
660 	++td->td_toks_stop;
661 	cpu_ccfence();
662 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
663 
664 #ifdef DEBUG_LOCKS
665 	/*
666 	 * Taking an exclusive token after holding it shared will
667 	 * livelock. Scan for that case and assert.
668 	 */
669 	lwkt_tokref_t tk;
670 	int found = 0;
671 	for (tk = &td->td_toks_base; tk < ref; tk++) {
672 		if (tk->tr_tok != tok)
673 			continue;
674 
675 		found++;
676 		if (tk->tr_count & TOK_EXCLUSIVE)
677 			goto good;
678 	}
679 	/* We found only shared instances of this token if found >0 here */
680 	KASSERT((found == 0), ("Token %p s/x livelock", tok));
681 good:
682 #endif
683 
684 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
685 		return;
686 
687 	/*
688 	 * Give up running if we can't acquire the token right now.
689 	 *
690 	 * Since the tokref is already active the scheduler now
691 	 * takes care of acquisition, so we need only call
692 	 * lwkt_switch().
693 	 *
694 	 * Since we failed this was not a recursive token so upon
695 	 * return tr_tok->t_ref should be assigned to this specific
696 	 * ref.
697 	 */
698 	td->td_wmesg = tok->t_desc;
699 	++tok->t_collisions;
700 	logtoken(fail, ref);
701 	td->td_toks_have = td->td_toks_stop - 1;
702 
703 	if (tokens_debug_output > 0) {
704 		--tokens_debug_output;
705 		spin_lock(&tok_debug_spin);
706 		kprintf("Excl Token %p thread %p %s %s\n",
707 			tok, td, tok->t_desc, td->td_comm);
708 		print_backtrace(6);
709 		kprintf("\n");
710 		spin_unlock(&tok_debug_spin);
711 	}
712 
713 	atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
714 	lwkt_switch();
715 	logtoken(succ, ref);
716 	KKASSERT(tok->t_ref == ref);
717 }
718 
719 /*
720  * Similar to gettoken but we acquire a shared token instead of an exclusive
721  * token.
722  */
723 void
724 lwkt_gettoken_shared(lwkt_token_t tok)
725 {
726 	thread_t td = curthread;
727 	lwkt_tokref_t ref;
728 
729 	ref = td->td_toks_stop;
730 	KKASSERT(ref < &td->td_toks_end);
731 	++td->td_toks_stop;
732 	cpu_ccfence();
733 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
734 
735 #ifdef DEBUG_LOCKS
736 	/*
737 	 * Taking a pool token in shared mode is a bad idea; other
738 	 * addresses deeper in the call stack may hash to the same pool
739 	 * token and you may end up with an exclusive-shared livelock.
740 	 * Warn in this condition.
741 	 */
742 	if ((tok >= &pool_tokens[0].token) &&
743 	    (tok < &pool_tokens[LWKT_POOL_TOKENS].token))
744 		kprintf("Warning! Taking pool token %p in shared mode\n", tok);
745 #endif
746 
747 
748 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
749 		return;
750 
751 	/*
752 	 * Give up running if we can't acquire the token right now.
753 	 *
754 	 * Since the tokref is already active the scheduler now
755 	 * takes care of acquisition, so we need only call
756 	 * lwkt_switch().
757 	 *
758 	 * Since we failed this was not a recursive token so upon
759 	 * return tr_tok->t_ref should be assigned to this specific
760 	 * ref.
761 	 */
762 	td->td_wmesg = tok->t_desc;
763 	++tok->t_collisions;
764 	logtoken(fail, ref);
765 	td->td_toks_have = td->td_toks_stop - 1;
766 
767 	if (tokens_debug_output > 0) {
768 		--tokens_debug_output;
769 		spin_lock(&tok_debug_spin);
770 		kprintf("Shar Token %p thread %p %s %s\n",
771 			tok, td, tok->t_desc, td->td_comm);
772 		print_backtrace(6);
773 		kprintf("\n");
774 		spin_unlock(&tok_debug_spin);
775 	}
776 
777 	atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
778 	lwkt_switch();
779 	logtoken(succ, ref);
780 }
781 
782 /*
783  * Attempt to acquire a token, return TRUE on success, FALSE on failure.
784  *
785  * We setup the tokref in case we actually get the token (if we switch later
786  * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
787  * TOK_EXCLREQ in case we fail.
788  */
789 int
790 lwkt_trytoken(lwkt_token_t tok)
791 {
792 	thread_t td = curthread;
793 	lwkt_tokref_t ref;
794 
795 	ref = td->td_toks_stop;
796 	KKASSERT(ref < &td->td_toks_end);
797 	++td->td_toks_stop;
798 	cpu_ccfence();
799 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
800 
801 	if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
802 		return TRUE;
803 
804 	/*
805 	 * Failed, unpend the request
806 	 */
807 	cpu_ccfence();
808 	--td->td_toks_stop;
809 	++tok->t_collisions;
810 	return FALSE;
811 }
812 
813 lwkt_token_t
814 lwkt_getpooltoken(void *ptr)
815 {
816 	lwkt_token_t tok;
817 
818 	tok = _lwkt_token_pool_lookup(ptr);
819 	lwkt_gettoken(tok);
820 	return (tok);
821 }
822 
823 /*
824  * Release a serializing token.
825  *
826  * WARNING!  All tokens must be released in reverse order.  This will be
827  *	     asserted.
828  */
829 void
830 lwkt_reltoken(lwkt_token_t tok)
831 {
832 	thread_t td = curthread;
833 	lwkt_tokref_t ref;
834 
835 	/*
836 	 * Remove ref from thread token list and assert that it matches
837 	 * the token passed in.  Tokens must be released in reverse order.
838 	 */
839 	ref = td->td_toks_stop - 1;
840 	if (__predict_false(ref < &td->td_toks_base || ref->tr_tok != tok)) {
841 		kprintf("LWKT_RELTOKEN ASSERTION td %p tok %p ref %p/%p\n",
842 			td, tok, &td->td_toks_base, ref);
843 		kprintf("REF CONTENT: tok=%p count=%016lx owner=%p\n",
844 			ref->tr_tok, ref->tr_count, ref->tr_owner);
845 		if (ref < &td->td_toks_base) {
846 			kprintf("lwkt_reltoken: no tokens to release\n");
847 		} else {
848 			kprintf("lwkt_reltoken: release wants %s and got %s\n",
849 				tok->t_desc, ref->tr_tok->t_desc);
850 		}
851 		panic("lwkt_reltoken: illegal release");
852 	}
853 	_lwkt_reltokref(ref, td);
854 	cpu_sfence();
855 	td->td_toks_stop = ref;
856 }
857 
858 /*
859  * It is faster for users of lwkt_getpooltoken() to use the returned
860  * token and just call lwkt_reltoken(), but for convenience we provide
861  * this function which looks the token up based on the ident.
862  */
863 void
864 lwkt_relpooltoken(void *ptr)
865 {
866 	lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
867 	lwkt_reltoken(tok);
868 }
869 
870 /*
871  * Return a count of the number of token refs the thread has to the
872  * specified token, whether it currently owns the token or not.
873  */
874 int
875 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
876 {
877 	lwkt_tokref_t scan;
878 	int count = 0;
879 
880 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
881 		if (scan->tr_tok == tok)
882 			++count;
883 	}
884 	return(count);
885 }
886 
887 /*
888  * Pool tokens are used to provide a type-stable serializing token
889  * pointer that does not race against disappearing data structures.
890  *
891  * This routine is called in early boot just after we setup the BSP's
892  * globaldata structure.
893  */
894 void
895 lwkt_token_pool_init(void)
896 {
897 	int i;
898 
899 	for (i = 0; i < LWKT_POOL_TOKENS; ++i)
900 		lwkt_token_init(&pool_tokens[i].token, "pool");
901 }
902 
903 lwkt_token_t
904 lwkt_token_pool_lookup(void *ptr)
905 {
906 	return (_lwkt_token_pool_lookup(ptr));
907 }
908 
909 /*
910  * Initialize a token.
911  */
912 void
913 lwkt_token_init(lwkt_token_t tok, const char *desc)
914 {
915 	tok->t_count = 0;
916 	tok->t_ref = NULL;
917 	tok->t_collisions = 0;
918 	tok->t_desc = desc;
919 }
920 
921 void
922 lwkt_token_uninit(lwkt_token_t tok)
923 {
924 	/* empty */
925 }
926 
927 /*
928  * Exchange the two most recent tokens on the tokref stack.  This allows
929  * you to release a token out of order.
930  *
931  * We have to be careful about the case where the top two tokens are
932  * the same token.  In this case tok->t_ref will point to the deeper
933  * ref and must remain pointing to the deeper ref.  If we were to swap
934  * it the first release would clear the token even though a second
935  * ref is still present.
936  *
937  * Only exclusively held tokens contain a reference to the tokref which
938  * has to be flipped along with the swap.
939  */
940 void
941 lwkt_token_swap(void)
942 {
943 	lwkt_tokref_t ref1, ref2;
944 	lwkt_token_t tok1, tok2;
945 	long count1, count2;
946 	thread_t td = curthread;
947 
948 	crit_enter();
949 
950 	ref1 = td->td_toks_stop - 1;
951 	ref2 = td->td_toks_stop - 2;
952 	KKASSERT(ref1 >= &td->td_toks_base);
953 	KKASSERT(ref2 >= &td->td_toks_base);
954 
955 	tok1 = ref1->tr_tok;
956 	tok2 = ref2->tr_tok;
957 	count1 = ref1->tr_count;
958 	count2 = ref2->tr_count;
959 
960 	if (tok1 != tok2) {
961 		ref1->tr_tok = tok2;
962 		ref1->tr_count = count2;
963 		ref2->tr_tok = tok1;
964 		ref2->tr_count = count1;
965 		if (tok1->t_ref == ref1)
966 			tok1->t_ref = ref2;
967 		if (tok2->t_ref == ref2)
968 			tok2->t_ref = ref1;
969 	}
970 
971 	crit_exit();
972 }
973 
974 #ifdef DDB
975 DB_SHOW_COMMAND(tokens, db_tok_all)
976 {
977 	struct lwkt_token *tok, **ptr;
978 	struct lwkt_token *toklist[16] = {
979 		&mp_token,
980 		&pmap_token,
981 		&dev_token,
982 		&vm_token,
983 		&vmspace_token,
984 		&kvm_token,
985 		&sigio_token,
986 		&tty_token,
987 		&vnode_token,
988 		NULL
989 	};
990 
991 	ptr = toklist;
992 	for (tok = *ptr; tok; tok = *(++ptr)) {
993 		db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
994 		    (tok->t_ref ? tok->t_ref->tr_owner : NULL),
995 		    tok->t_collisions, tok->t_desc);
996 	}
997 }
998 #endif /* DDB */
999