xref: /dragonfly/sys/kern/lwkt_token.c (revision 50b09fda)
1 /*
2  * Copyright (c) 2003,2004,2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * lwkt_token - Implement soft token locks.
37  *
38  * Tokens are locks which serialize a thread only while the thread is
39  * running.  If the thread blocks all tokens are released, then reacquired
40  * when the thread resumes.
41  *
42  * This implementation requires no critical sections or spin locks, but
43  * does use atomic_cmpset_ptr().
44  *
45  * Tokens may be recursively acquired by the same thread.  However the
46  * caller must be sure to release such tokens in reverse order.
47  */
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/rtprio.h>
53 #include <sys/queue.h>
54 #include <sys/sysctl.h>
55 #include <sys/ktr.h>
56 #include <sys/kthread.h>
57 #include <machine/cpu.h>
58 #include <sys/lock.h>
59 #include <sys/spinlock.h>
60 
61 #include <sys/thread2.h>
62 #include <sys/spinlock2.h>
63 #include <sys/mplock2.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/vm_kern.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_pager.h>
72 #include <vm/vm_extern.h>
73 #include <vm/vm_zone.h>
74 
75 #include <machine/stdarg.h>
76 #include <machine/smp.h>
77 
78 #include "opt_ddb.h"
79 #ifdef DDB
80 #include <ddb/ddb.h>
81 #endif
82 
83 extern int lwkt_sched_debug;
84 
85 #define LWKT_POOL_TOKENS	16384		/* must be power of 2 */
86 #define LWKT_POOL_MASK		(LWKT_POOL_TOKENS - 1)
87 
88 struct lwkt_pool_token {
89 	struct lwkt_token	token;
90 } __cachealign;
91 
92 static struct lwkt_pool_token	pool_tokens[LWKT_POOL_TOKENS];
93 struct spinlock tok_debug_spin = SPINLOCK_INITIALIZER(&tok_debug_spin,
94 						      "tok_debug_spin");
95 
96 #define TOKEN_STRING	"REF=%p TOK=%p TD=%p"
97 #define TOKEN_ARGS	lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
98 #define CONTENDED_STRING	TOKEN_STRING " (contention started)"
99 #define UNCONTENDED_STRING	TOKEN_STRING " (contention stopped)"
100 #if !defined(KTR_TOKENS)
101 #define	KTR_TOKENS	KTR_ALL
102 #endif
103 
104 KTR_INFO_MASTER(tokens);
105 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
106 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
107 #if 0
108 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
109 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
110 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
111 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
112 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
113 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
114 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
115 #endif
116 
117 #define logtoken(name, ref)						\
118 	KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
119 
120 /*
121  * Global tokens.  These replace the MP lock for major subsystem locking.
122  * These tokens are initially used to lockup both global and individual
123  * operations.
124  *
125  * Once individual structures get their own locks these tokens are used
126  * only to protect global lists & other variables and to interlock
127  * allocations and teardowns and such.
128  *
129  * The UP initializer causes token acquisition to also acquire the MP lock
130  * for maximum compatibility.  The feature may be enabled and disabled at
131  * any time, the MP state is copied to the tokref when the token is acquired
132  * and will not race against sysctl changes.
133  */
134 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
135 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
136 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
137 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
138 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
139 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
140 struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
141 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
142 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
143 struct lwkt_token vga_token = LWKT_TOKEN_INITIALIZER(vga_token);
144 struct lwkt_token kbd_token = LWKT_TOKEN_INITIALIZER(kbd_token);
145 
146 /*
147  * Exponential backoff (exclusive tokens) and TSC windowing (shared tokens)
148  * parameters.  Remember that tokens backoff to the scheduler, large values
149  * not recommended.
150  */
151 static int token_backoff_max __cachealign = 4096;
152 SYSCTL_INT(_lwkt, OID_AUTO, token_backoff_max, CTLFLAG_RW,
153     &token_backoff_max, 0, "Tokens exponential backoff");
154 static int token_window_shift __cachealign = 8;
155 SYSCTL_INT(_lwkt, OID_AUTO, token_window_shift, CTLFLAG_RW,
156     &token_window_shift, 0, "Tokens TSC windowing shift");
157 
158 /*
159  * The collision count is bumped every time the LWKT scheduler fails
160  * to acquire needed tokens in addition to a normal lwkt_gettoken()
161  * stall.
162  */
163 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
164     &mp_token.t_collisions, 0, "Collision counter of mp_token");
165 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
166     &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
167 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
168     &dev_token.t_collisions, 0, "Collision counter of dev_token");
169 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
170     &vm_token.t_collisions, 0, "Collision counter of vm_token");
171 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
172     &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
173 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
174     &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
175 SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
176     &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
177 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
178     &tty_token.t_collisions, 0, "Collision counter of tty_token");
179 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
180     &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
181 
182 int tokens_debug_output;
183 SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
184     &tokens_debug_output, 0, "Generate stack trace N times");
185 
186 static int _lwkt_getalltokens_sorted(thread_t td);
187 
188 /*
189  * Acquire the initial mplock
190  *
191  * (low level boot only)
192  */
193 void
194 cpu_get_initial_mplock(void)
195 {
196 	KKASSERT(mp_token.t_ref == NULL);
197 	if (lwkt_trytoken(&mp_token) == FALSE)
198 		panic("cpu_get_initial_mplock");
199 }
200 
201 /*
202  * Return a pool token given an address.  Use a prime number to reduce
203  * overlaps.
204  */
205 #define POOL_HASH_PRIME1	66555444443333333ULL
206 #define POOL_HASH_PRIME2	989042931893ULL
207 
208 static __inline
209 lwkt_token_t
210 _lwkt_token_pool_lookup(void *ptr)
211 {
212 	uintptr_t hash1;
213 	uintptr_t hash2;
214 
215 	hash1 = (uintptr_t)ptr + ((uintptr_t)ptr >> 18);
216 	hash1 %= POOL_HASH_PRIME1;
217 	hash2 = ((uintptr_t)ptr >> 8) + ((uintptr_t)ptr >> 24);
218 	hash2 %= POOL_HASH_PRIME2;
219 	return (&pool_tokens[(hash1 ^ hash2) & LWKT_POOL_MASK].token);
220 }
221 
222 /*
223  * Initialize a tokref_t prior to making it visible in the thread's
224  * token array.
225  */
226 static __inline
227 void
228 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
229 {
230 	ref->tr_tok = tok;
231 	ref->tr_count = excl;
232 	ref->tr_owner = td;
233 }
234 
235 /*
236  * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
237  * FALSE on failure.
238  *
239  * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
240  * token, otherwise are attempting to get a shared token.
241  *
242  * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
243  * it is a non-blocking operation (for both exclusive or shared acquisions).
244  */
245 static __inline
246 int
247 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
248 {
249 	lwkt_token_t tok;
250 	lwkt_tokref_t oref;
251 	long count;
252 
253 	tok = ref->tr_tok;
254 	KASSERT(((mode & TOK_EXCLREQ) == 0 ||	/* non blocking */
255 		td->td_gd->gd_intr_nesting_level == 0 ||
256 		panic_cpu_gd == mycpu),
257 		("Attempt to acquire token %p not already "
258 		"held in hard code section", tok));
259 
260 	if (mode & TOK_EXCLUSIVE) {
261 		/*
262 		 * Attempt to get an exclusive token
263 		 */
264 		count = tok->t_count;
265 
266 		for (;;) {
267 			oref = tok->t_ref;	/* can be NULL */
268 			cpu_ccfence();
269 			if ((count & ~TOK_EXCLREQ) == 0) {
270 				/*
271 				 * It is possible to get the exclusive bit.
272 				 * We must clear TOK_EXCLREQ on successful
273 				 * acquisition.
274 				 */
275 				if (atomic_fcmpset_long(&tok->t_count, &count,
276 							(count & ~TOK_EXCLREQ) |
277 							TOK_EXCLUSIVE)) {
278 					KKASSERT(tok->t_ref == NULL);
279 					tok->t_ref = ref;
280 					return TRUE;
281 				}
282 				/* retry */
283 			} else if ((count & TOK_EXCLUSIVE) &&
284 				   oref >= &td->td_toks_base &&
285 				   oref < td->td_toks_stop) {
286 				/*
287 				 * Our thread already holds the exclusive
288 				 * bit, we treat this tokref as a shared
289 				 * token (sorta) to make the token release
290 				 * code easier.  Treating this as a shared
291 				 * token allows us to simply increment the
292 				 * count field.
293 				 *
294 				 * NOTE: oref cannot race above if it
295 				 *	 happens to be ours, so we're good.
296 				 *	 But we must still have a stable
297 				 *	 variable for both parts of the
298 				 *	 comparison.
299 				 *
300 				 * NOTE: Since we already have an exclusive
301 				 *	 lock and don't need to check EXCLREQ
302 				 *	 we can just use an atomic_add here
303 				 */
304 				atomic_add_long(&tok->t_count, TOK_INCR);
305 				ref->tr_count &= ~TOK_EXCLUSIVE;
306 				return TRUE;
307 			} else if ((mode & TOK_EXCLREQ) &&
308 				   (count & TOK_EXCLREQ) == 0) {
309 				/*
310 				 * Unable to get the exclusive bit but being
311 				 * asked to set the exclusive-request bit.
312 				 * Since we are going to retry anyway just
313 				 * set the bit unconditionally.
314 				 */
315 				atomic_set_long(&tok->t_count, TOK_EXCLREQ);
316 				return FALSE;
317 			} else {
318 				/*
319 				 * Unable to get the exclusive bit and not
320 				 * being asked to set the exclusive-request
321 				 * (aka lwkt_trytoken()), or EXCLREQ was
322 				 * already set.
323 				 */
324 				cpu_pause();
325 				return FALSE;
326 			}
327 			/* retry */
328 		}
329 	} else {
330 		/*
331 		 * Attempt to get a shared token.  Note that TOK_EXCLREQ
332 		 * for shared tokens simply means the caller intends to
333 		 * block.  We never actually set the bit in tok->t_count.
334 		 *
335 		 * Due to the token's no-deadlock guarantee, and complications
336 		 * created by the sorted reacquisition code, we can only
337 		 * give exclusive requests priority over shared requests
338 		 * in situations where the thread holds only one token.
339 		 */
340 		count = tok->t_count;
341 
342 		for (;;) {
343 			oref = tok->t_ref;	/* can be NULL */
344 			cpu_ccfence();
345 			if ((count & (TOK_EXCLUSIVE|mode)) == 0 ||
346 			    ((count & TOK_EXCLUSIVE) == 0 &&
347 			    td->td_toks_stop != &td->td_toks_base + 1)
348 			) {
349 				/*
350 				 * It may be possible to get the token shared.
351 				 */
352 				if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
353 					return TRUE;
354 				}
355 				count = atomic_fetchadd_long(&tok->t_count,
356 							     -TOK_INCR);
357 				count -= TOK_INCR;
358 				/* retry */
359 			} else if ((count & TOK_EXCLUSIVE) &&
360 				   oref >= &td->td_toks_base &&
361 				   oref < td->td_toks_stop) {
362 				/*
363 				 * We own the exclusive bit on the token so
364 				 * we can in fact also get it shared.
365 				 */
366 				atomic_add_long(&tok->t_count, TOK_INCR);
367 				return TRUE;
368 			} else {
369 				/*
370 				 * We failed to get the token shared
371 				 */
372 				return FALSE;
373 			}
374 			/* retry */
375 		}
376 	}
377 }
378 
379 static __inline
380 int
381 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
382 {
383 	if (_lwkt_trytokref(ref, td, mode))
384 		return TRUE;
385 
386 	if (mode & TOK_EXCLUSIVE) {
387 		/*
388 		 * Contested exclusive token, use exponential backoff
389 		 * algorithm.
390 		 */
391 		long expbackoff;
392 		long loop;
393 
394 		expbackoff = 0;
395 		while (expbackoff < 6 + token_backoff_max) {
396 			expbackoff = (expbackoff + 1) * 3 / 2;
397 			if ((rdtsc() >> token_window_shift) % ncpus != mycpuid)  {
398 				for (loop = expbackoff; loop; --loop)
399 					cpu_pause();
400 			}
401 			if (_lwkt_trytokref(ref, td, mode))
402 				return TRUE;
403 		}
404 	} else {
405 		/*
406 		 * Contested shared token, use TSC windowing.  Note that
407 		 * exclusive tokens have priority over shared tokens only
408 		 * for the first token.
409 		 */
410 		if ((rdtsc() >> token_window_shift) % ncpus == mycpuid) {
411 			if (_lwkt_trytokref(ref, td, mode & ~TOK_EXCLREQ))
412 				return TRUE;
413 		} else {
414 			if (_lwkt_trytokref(ref, td, mode))
415 				return TRUE;
416 		}
417 
418 	}
419 	++mycpu->gd_cnt.v_lock_colls;
420 
421 	return FALSE;
422 }
423 
424 /*
425  * Release a token that we hold.
426  *
427  * Since tokens are polled, we don't have to deal with wakeups and releasing
428  * is really easy.
429  */
430 static __inline
431 void
432 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
433 {
434 	lwkt_token_t tok;
435 	long count;
436 
437 	tok = ref->tr_tok;
438 	if (tok->t_ref == ref) {
439 		/*
440 		 * We are an exclusive holder.  We must clear tr_ref
441 		 * before we clear the TOK_EXCLUSIVE bit.  If we are
442 		 * unable to clear the bit we must restore
443 		 * tok->t_ref.
444 		 */
445 #if 0
446 		KKASSERT(count & TOK_EXCLUSIVE);
447 #endif
448 		tok->t_ref = NULL;
449 		atomic_clear_long(&tok->t_count, TOK_EXCLUSIVE);
450 	} else {
451 		/*
452 		 * We are a shared holder
453 		 */
454 		count = atomic_fetchadd_long(&tok->t_count, -TOK_INCR);
455 		KKASSERT(count & TOK_COUNTMASK);	/* count prior */
456 	}
457 }
458 
459 /*
460  * Obtain all the tokens required by the specified thread on the current
461  * cpu, return 0 on failure and non-zero on success.  If a failure occurs
462  * any partially acquired tokens will be released prior to return.
463  *
464  * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
465  * tokens that the thread had to release when it switched away.
466  *
467  * If spinning is non-zero this function acquires the tokens in a particular
468  * order to deal with potential deadlocks.  We simply use address order for
469  * the case.
470  *
471  * Called from a critical section.
472  */
473 int
474 lwkt_getalltokens(thread_t td, int spinning)
475 {
476 	lwkt_tokref_t scan;
477 	lwkt_token_t tok;
478 
479 	if (spinning)
480 		return(_lwkt_getalltokens_sorted(td));
481 
482 	/*
483 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
484 	 */
485 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
486 		tok = scan->tr_tok;
487 		for (;;) {
488 			/*
489 			 * Only try really hard on the last token
490 			 */
491 			if (scan == td->td_toks_stop - 1) {
492 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
493 				    break;
494 			} else {
495 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
496 				    break;
497 			}
498 
499 			/*
500 			 * Otherwise we failed to acquire all the tokens.
501 			 * Release whatever we did get.
502 			 */
503 			KASSERT(tok->t_desc,
504 				("token %p is not initialized", tok));
505 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
506 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
507 				tok->t_desc,
508 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
509 			if (lwkt_sched_debug > 0) {
510 				--lwkt_sched_debug;
511 				kprintf("toka %p %s %s\n",
512 					tok, tok->t_desc, td->td_comm);
513 			}
514 			td->td_wmesg = tok->t_desc;
515 			++tok->t_collisions;
516 			while (--scan >= &td->td_toks_base)
517 				_lwkt_reltokref(scan, td);
518 			return(FALSE);
519 		}
520 	}
521 	return (TRUE);
522 }
523 
524 /*
525  * Release all tokens owned by the specified thread on the current cpu.
526  *
527  * This code is really simple.  Even in cases where we own all the tokens
528  * note that t_ref may not match the scan for recursively held tokens which
529  * are held deeper in the stack, or for the case where a lwkt_getalltokens()
530  * failed.
531  *
532  * Tokens are released in reverse order to reduce chasing race failures.
533  *
534  * Called from a critical section.
535  */
536 void
537 lwkt_relalltokens(thread_t td)
538 {
539 	lwkt_tokref_t scan;
540 
541 	/*
542 	 * Weird order is to try to avoid a panic loop
543 	 */
544 	if (td->td_toks_have) {
545 		scan = td->td_toks_have;
546 		td->td_toks_have = NULL;
547 	} else {
548 		scan = td->td_toks_stop;
549 	}
550 	while (--scan >= &td->td_toks_base)
551 		_lwkt_reltokref(scan, td);
552 }
553 
554 /*
555  * This is the decontention version of lwkt_getalltokens().  The tokens are
556  * acquired in address-sorted order to deal with any deadlocks.  Ultimately
557  * token failures will spin into the scheduler and get here.
558  *
559  * Called from critical section
560  */
561 static
562 int
563 _lwkt_getalltokens_sorted(thread_t td)
564 {
565 	lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
566 	lwkt_tokref_t scan;
567 	lwkt_token_t tok;
568 	int i;
569 	int j;
570 	int n;
571 
572 	/*
573 	 * Sort the token array.  Yah yah, I know this isn't fun.
574 	 *
575 	 * NOTE: Recursively acquired tokens are ordered the same as in the
576 	 *	 td_toks_array so we can always get the earliest one first.
577 	 *	 This is particularly important when a token is acquired
578 	 *	 exclusively multiple times, as only the first acquisition
579 	 *	 is treated as an exclusive token.
580 	 */
581 	i = 0;
582 	scan = &td->td_toks_base;
583 	while (scan < td->td_toks_stop) {
584 		for (j = 0; j < i; ++j) {
585 			if (scan->tr_tok < sort_array[j]->tr_tok)
586 				break;
587 		}
588 		if (j != i) {
589 			bcopy(sort_array + j, sort_array + j + 1,
590 			      (i - j) * sizeof(lwkt_tokref_t));
591 		}
592 		sort_array[j] = scan;
593 		++scan;
594 		++i;
595 	}
596 	n = i;
597 
598 	/*
599 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
600 	 */
601 	for (i = 0; i < n; ++i) {
602 		scan = sort_array[i];
603 		tok = scan->tr_tok;
604 		for (;;) {
605 			/*
606 			 * Only try really hard on the last token
607 			 */
608 			if (scan == td->td_toks_stop - 1) {
609 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
610 				    break;
611 			} else {
612 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
613 				    break;
614 			}
615 
616 			/*
617 			 * Otherwise we failed to acquire all the tokens.
618 			 * Release whatever we did get.
619 			 */
620 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
621 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
622 				tok->t_desc,
623 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
624 			if (lwkt_sched_debug > 0) {
625 				--lwkt_sched_debug;
626 				kprintf("tokb %p %s %s\n",
627 					tok, tok->t_desc, td->td_comm);
628 			}
629 			td->td_wmesg = tok->t_desc;
630 			++tok->t_collisions;
631 			while (--i >= 0) {
632 				scan = sort_array[i];
633 				_lwkt_reltokref(scan, td);
634 			}
635 			return(FALSE);
636 		}
637 	}
638 
639 	/*
640 	 * We were successful, there is no need for another core to signal
641 	 * us.
642 	 */
643 	return (TRUE);
644 }
645 
646 /*
647  * Get a serializing token.  This routine can block.
648  */
649 void
650 lwkt_gettoken(lwkt_token_t tok)
651 {
652 	thread_t td = curthread;
653 	lwkt_tokref_t ref;
654 
655 	ref = td->td_toks_stop;
656 	KKASSERT(ref < &td->td_toks_end);
657 	++td->td_toks_stop;
658 	cpu_ccfence();
659 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
660 
661 #ifdef DEBUG_LOCKS
662 	/*
663 	 * Taking an exclusive token after holding it shared will
664 	 * livelock. Scan for that case and assert.
665 	 */
666 	lwkt_tokref_t tk;
667 	int found = 0;
668 	for (tk = &td->td_toks_base; tk < ref; tk++) {
669 		if (tk->tr_tok != tok)
670 			continue;
671 
672 		found++;
673 		if (tk->tr_count & TOK_EXCLUSIVE)
674 			goto good;
675 	}
676 	/* We found only shared instances of this token if found >0 here */
677 	KASSERT((found == 0), ("Token %p s/x livelock", tok));
678 good:
679 #endif
680 
681 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
682 		return;
683 
684 	/*
685 	 * Give up running if we can't acquire the token right now.
686 	 *
687 	 * Since the tokref is already active the scheduler now
688 	 * takes care of acquisition, so we need only call
689 	 * lwkt_switch().
690 	 *
691 	 * Since we failed this was not a recursive token so upon
692 	 * return tr_tok->t_ref should be assigned to this specific
693 	 * ref.
694 	 */
695 	td->td_wmesg = tok->t_desc;
696 	++tok->t_collisions;
697 	logtoken(fail, ref);
698 	td->td_toks_have = td->td_toks_stop - 1;
699 
700 	if (tokens_debug_output > 0) {
701 		--tokens_debug_output;
702 		spin_lock(&tok_debug_spin);
703 		kprintf("Excl Token %p thread %p %s %s\n",
704 			tok, td, tok->t_desc, td->td_comm);
705 		print_backtrace(6);
706 		kprintf("\n");
707 		spin_unlock(&tok_debug_spin);
708 	}
709 
710 	atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
711 	lwkt_switch();
712 	logtoken(succ, ref);
713 	KKASSERT(tok->t_ref == ref);
714 }
715 
716 /*
717  * Similar to gettoken but we acquire a shared token instead of an exclusive
718  * token.
719  */
720 void
721 lwkt_gettoken_shared(lwkt_token_t tok)
722 {
723 	thread_t td = curthread;
724 	lwkt_tokref_t ref;
725 
726 	ref = td->td_toks_stop;
727 	KKASSERT(ref < &td->td_toks_end);
728 	++td->td_toks_stop;
729 	cpu_ccfence();
730 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
731 
732 #ifdef DEBUG_LOCKS
733 	/*
734 	 * Taking a pool token in shared mode is a bad idea; other
735 	 * addresses deeper in the call stack may hash to the same pool
736 	 * token and you may end up with an exclusive-shared livelock.
737 	 * Warn in this condition.
738 	 */
739 	if ((tok >= &pool_tokens[0].token) &&
740 	    (tok < &pool_tokens[LWKT_POOL_TOKENS].token))
741 		kprintf("Warning! Taking pool token %p in shared mode\n", tok);
742 #endif
743 
744 
745 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
746 		return;
747 
748 	/*
749 	 * Give up running if we can't acquire the token right now.
750 	 *
751 	 * Since the tokref is already active the scheduler now
752 	 * takes care of acquisition, so we need only call
753 	 * lwkt_switch().
754 	 *
755 	 * Since we failed this was not a recursive token so upon
756 	 * return tr_tok->t_ref should be assigned to this specific
757 	 * ref.
758 	 */
759 	td->td_wmesg = tok->t_desc;
760 	++tok->t_collisions;
761 	logtoken(fail, ref);
762 	td->td_toks_have = td->td_toks_stop - 1;
763 
764 	if (tokens_debug_output > 0) {
765 		--tokens_debug_output;
766 		spin_lock(&tok_debug_spin);
767 		kprintf("Shar Token %p thread %p %s %s\n",
768 			tok, td, tok->t_desc, td->td_comm);
769 		print_backtrace(6);
770 		kprintf("\n");
771 		spin_unlock(&tok_debug_spin);
772 	}
773 
774 	atomic_set_int(&td->td_mpflags, TDF_MP_DIDYIELD);
775 	lwkt_switch();
776 	logtoken(succ, ref);
777 }
778 
779 /*
780  * Attempt to acquire a token, return TRUE on success, FALSE on failure.
781  *
782  * We setup the tokref in case we actually get the token (if we switch later
783  * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
784  * TOK_EXCLREQ in case we fail.
785  */
786 int
787 lwkt_trytoken(lwkt_token_t tok)
788 {
789 	thread_t td = curthread;
790 	lwkt_tokref_t ref;
791 
792 	ref = td->td_toks_stop;
793 	KKASSERT(ref < &td->td_toks_end);
794 	++td->td_toks_stop;
795 	cpu_ccfence();
796 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
797 
798 	if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
799 		return TRUE;
800 
801 	/*
802 	 * Failed, unpend the request
803 	 */
804 	cpu_ccfence();
805 	--td->td_toks_stop;
806 	++tok->t_collisions;
807 	return FALSE;
808 }
809 
810 lwkt_token_t
811 lwkt_getpooltoken(void *ptr)
812 {
813 	lwkt_token_t tok;
814 
815 	tok = _lwkt_token_pool_lookup(ptr);
816 	lwkt_gettoken(tok);
817 	return (tok);
818 }
819 
820 /*
821  * Release a serializing token.
822  *
823  * WARNING!  All tokens must be released in reverse order.  This will be
824  *	     asserted.
825  */
826 void
827 lwkt_reltoken(lwkt_token_t tok)
828 {
829 	thread_t td = curthread;
830 	lwkt_tokref_t ref;
831 
832 	/*
833 	 * Remove ref from thread token list and assert that it matches
834 	 * the token passed in.  Tokens must be released in reverse order.
835 	 */
836 	ref = td->td_toks_stop - 1;
837 	KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok);
838 	_lwkt_reltokref(ref, td);
839 	cpu_sfence();
840 	td->td_toks_stop = ref;
841 }
842 
843 /*
844  * It is faster for users of lwkt_getpooltoken() to use the returned
845  * token and just call lwkt_reltoken(), but for convenience we provide
846  * this function which looks the token up based on the ident.
847  */
848 void
849 lwkt_relpooltoken(void *ptr)
850 {
851 	lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
852 	lwkt_reltoken(tok);
853 }
854 
855 /*
856  * Return a count of the number of token refs the thread has to the
857  * specified token, whether it currently owns the token or not.
858  */
859 int
860 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
861 {
862 	lwkt_tokref_t scan;
863 	int count = 0;
864 
865 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
866 		if (scan->tr_tok == tok)
867 			++count;
868 	}
869 	return(count);
870 }
871 
872 /*
873  * Pool tokens are used to provide a type-stable serializing token
874  * pointer that does not race against disappearing data structures.
875  *
876  * This routine is called in early boot just after we setup the BSP's
877  * globaldata structure.
878  */
879 void
880 lwkt_token_pool_init(void)
881 {
882 	int i;
883 
884 	for (i = 0; i < LWKT_POOL_TOKENS; ++i)
885 		lwkt_token_init(&pool_tokens[i].token, "pool");
886 }
887 
888 lwkt_token_t
889 lwkt_token_pool_lookup(void *ptr)
890 {
891 	return (_lwkt_token_pool_lookup(ptr));
892 }
893 
894 /*
895  * Initialize a token.
896  */
897 void
898 lwkt_token_init(lwkt_token_t tok, const char *desc)
899 {
900 	tok->t_count = 0;
901 	tok->t_ref = NULL;
902 	tok->t_collisions = 0;
903 	tok->t_desc = desc;
904 }
905 
906 void
907 lwkt_token_uninit(lwkt_token_t tok)
908 {
909 	/* empty */
910 }
911 
912 /*
913  * Exchange the two most recent tokens on the tokref stack.  This allows
914  * you to release a token out of order.
915  *
916  * We have to be careful about the case where the top two tokens are
917  * the same token.  In this case tok->t_ref will point to the deeper
918  * ref and must remain pointing to the deeper ref.  If we were to swap
919  * it the first release would clear the token even though a second
920  * ref is still present.
921  *
922  * Only exclusively held tokens contain a reference to the tokref which
923  * has to be flipped along with the swap.
924  */
925 void
926 lwkt_token_swap(void)
927 {
928 	lwkt_tokref_t ref1, ref2;
929 	lwkt_token_t tok1, tok2;
930 	long count1, count2;
931 	thread_t td = curthread;
932 
933 	crit_enter();
934 
935 	ref1 = td->td_toks_stop - 1;
936 	ref2 = td->td_toks_stop - 2;
937 	KKASSERT(ref1 >= &td->td_toks_base);
938 	KKASSERT(ref2 >= &td->td_toks_base);
939 
940 	tok1 = ref1->tr_tok;
941 	tok2 = ref2->tr_tok;
942 	count1 = ref1->tr_count;
943 	count2 = ref2->tr_count;
944 
945 	if (tok1 != tok2) {
946 		ref1->tr_tok = tok2;
947 		ref1->tr_count = count2;
948 		ref2->tr_tok = tok1;
949 		ref2->tr_count = count1;
950 		if (tok1->t_ref == ref1)
951 			tok1->t_ref = ref2;
952 		if (tok2->t_ref == ref2)
953 			tok2->t_ref = ref1;
954 	}
955 
956 	crit_exit();
957 }
958 
959 #ifdef DDB
960 DB_SHOW_COMMAND(tokens, db_tok_all)
961 {
962 	struct lwkt_token *tok, **ptr;
963 	struct lwkt_token *toklist[16] = {
964 		&mp_token,
965 		&pmap_token,
966 		&dev_token,
967 		&vm_token,
968 		&vmspace_token,
969 		&kvm_token,
970 		&sigio_token,
971 		&tty_token,
972 		&vnode_token,
973 		NULL
974 	};
975 
976 	ptr = toklist;
977 	for (tok = *ptr; tok; tok = *(++ptr)) {
978 		db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
979 		    (tok->t_ref ? tok->t_ref->tr_owner : NULL),
980 		    tok->t_collisions, tok->t_desc);
981 	}
982 }
983 #endif /* DDB */
984