1 /*
2 * Copyright (c) 2009 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34 /*
35 * Implement fast persistent locks based on atomic_cmpset_int() with
36 * semantics similar to lockmgr locks but faster and taking up much less
37 * space. Taken from HAMMER's lock implementation.
38 *
39 * These are meant to complement our LWKT tokens. Tokens are only held
40 * while the thread is running. Mutexes can be held across blocking
41 * conditions.
42 *
43 * - Exclusive priority over shared to prevent SMP starvation.
44 * - locks can be aborted (async callback, if any, will be made w/ENOLCK).
45 * - locks can be asynchronous.
46 * - synchronous fast path if no blocking occurs (async callback is not
47 * made in this case).
48 *
49 * Generally speaking any caller-supplied link state must be properly
50 * initialized before use.
51 *
52 * Most of the support is in sys/mutex[2].h. We mostly provide backoff
53 * functions here.
54 */
55
56 #include <sys/param.h>
57 #include <sys/systm.h>
58 #include <sys/kernel.h>
59 #include <sys/sysctl.h>
60 #include <sys/indefinite.h>
61 #include <sys/thread.h>
62
63 #include <machine/cpufunc.h>
64
65 #include <sys/thread2.h>
66 #include <sys/mutex2.h>
67 #include <sys/indefinite2.h>
68
69 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock);
70 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock);
71 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link);
72
73 /*
74 * Exclusive-lock a mutex, block until acquired unless link is async.
75 * Recursion is allowed.
76 *
77 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS
78 * if async. If immediately successful an async exclusive lock will return 0
79 * and not issue the async callback or link the link structure. The caller
80 * must handle this case (typically this is an optimal code path).
81 *
82 * A tsleep() error can only be returned if PCATCH is specified in the flags.
83 */
84 static __inline int
__mtx_lock_ex(mtx_t * mtx,mtx_link_t * link,int flags,int to)85 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to)
86 {
87 thread_t td;
88 u_int lock;
89 u_int nlock;
90 int error;
91 int isasync;
92
93 for (;;) {
94 lock = mtx->mtx_lock;
95 cpu_ccfence();
96
97 if (lock == 0) {
98 nlock = MTX_EXCLUSIVE | 1;
99 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
100 mtx->mtx_owner = curthread;
101 cpu_sfence();
102 link->state = MTX_LINK_ACQUIRED;
103 error = 0;
104 break;
105 }
106 continue;
107 }
108 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) {
109 KKASSERT((lock & MTX_MASK) != MTX_MASK);
110 nlock = lock + 1;
111 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
112 cpu_sfence();
113 link->state = MTX_LINK_ACQUIRED;
114 error = 0;
115 break;
116 }
117 continue;
118 }
119
120 /*
121 * We need MTX_LINKSPIN to manipulate exlink or
122 * shlink.
123 *
124 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate
125 * pending exclusive requests. It cannot be set as a separate
126 * operation prior to acquiring MTX_LINKSPIN.
127 *
128 * To avoid unnecessary cpu cache traffic we poll
129 * for collisions. It is also possible that EXWANTED
130 * state failing the above test was spurious, so all the
131 * tests must be repeated if we cannot obtain LINKSPIN
132 * with the prior state tests intact (i.e. don't reload
133 * the (lock) variable here, for heaven's sake!).
134 */
135 if (lock & MTX_LINKSPIN) {
136 cpu_pause();
137 continue;
138 }
139 td = curthread;
140 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN;
141 crit_enter_quick(td);
142 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
143 crit_exit_quick(td);
144 continue;
145 }
146
147 /*
148 * Check for early abort.
149 */
150 if (link->state == MTX_LINK_ABORTED) {
151 if (mtx->mtx_exlink == NULL) {
152 atomic_clear_int(&mtx->mtx_lock,
153 MTX_LINKSPIN |
154 MTX_EXWANTED);
155 } else {
156 atomic_clear_int(&mtx->mtx_lock,
157 MTX_LINKSPIN);
158 }
159 crit_exit_quick(td);
160 link->state = MTX_LINK_IDLE;
161 error = ENOLCK;
162 break;
163 }
164
165 /*
166 * Add our link to the exlink list and release LINKSPIN.
167 */
168 link->owner = td;
169 link->state = MTX_LINK_LINKED_EX;
170 if (mtx->mtx_exlink) {
171 link->next = mtx->mtx_exlink;
172 link->prev = link->next->prev;
173 link->next->prev = link;
174 link->prev->next = link;
175 } else {
176 link->next = link;
177 link->prev = link;
178 mtx->mtx_exlink = link;
179 }
180 isasync = (link->callback != NULL);
181 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
182 crit_exit_quick(td);
183
184 /*
185 * If asynchronous lock request return without
186 * blocking, leave link structure linked.
187 */
188 if (isasync) {
189 error = EINPROGRESS;
190 break;
191 }
192
193 /*
194 * Wait for lock
195 */
196 error = mtx_wait_link(mtx, link, flags, to);
197 break;
198 }
199 return (error);
200 }
201
202 int
_mtx_lock_ex_link(mtx_t * mtx,mtx_link_t * link,int flags,int to)203 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
204 {
205 return(__mtx_lock_ex(mtx, link, flags, to));
206 }
207
208 int
_mtx_lock_ex(mtx_t * mtx,int flags,int to)209 _mtx_lock_ex(mtx_t *mtx, int flags, int to)
210 {
211 mtx_link_t link;
212
213 mtx_link_init(&link);
214 return(__mtx_lock_ex(mtx, &link, flags, to));
215 }
216
217 int
_mtx_lock_ex_quick(mtx_t * mtx)218 _mtx_lock_ex_quick(mtx_t *mtx)
219 {
220 mtx_link_t link;
221
222 mtx_link_init(&link);
223 return(__mtx_lock_ex(mtx, &link, 0, 0));
224 }
225
226 /*
227 * Share-lock a mutex, block until acquired. Recursion is allowed.
228 *
229 * Returns 0 on success, or the tsleep() return code on failure.
230 * An error can only be returned if PCATCH is specified in the flags.
231 *
232 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
233 * do not have to chain the wakeup().
234 */
235 static __inline int
__mtx_lock_sh(mtx_t * mtx,mtx_link_t * link,int flags,int to)236 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to)
237 {
238 thread_t td;
239 u_int lock;
240 u_int nlock;
241 int error;
242 int isasync;
243
244 for (;;) {
245 lock = mtx->mtx_lock;
246 cpu_ccfence();
247
248 if (lock == 0) {
249 nlock = 1;
250 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
251 error = 0;
252 cpu_sfence();
253 link->state = MTX_LINK_ACQUIRED;
254 break;
255 }
256 continue;
257 }
258 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) {
259 KKASSERT((lock & MTX_MASK) != MTX_MASK);
260 nlock = lock + 1;
261 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
262 error = 0;
263 cpu_sfence();
264 link->state = MTX_LINK_ACQUIRED;
265 break;
266 }
267 continue;
268 }
269
270 /*
271 * We need MTX_LINKSPIN to manipulate exlink or
272 * shlink.
273 *
274 * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate
275 * pending shared requests. It cannot be set as a separate
276 * operation prior to acquiring MTX_LINKSPIN.
277 *
278 * To avoid unnecessary cpu cache traffic we poll
279 * for collisions. It is also possible that EXWANTED
280 * state failing the above test was spurious, so all the
281 * tests must be repeated if we cannot obtain LINKSPIN
282 * with the prior state tests intact (i.e. don't reload
283 * the (lock) variable here, for heaven's sake!).
284 */
285 if (lock & MTX_LINKSPIN) {
286 cpu_pause();
287 continue;
288 }
289 td = curthread;
290 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN;
291 crit_enter_quick(td);
292 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
293 crit_exit_quick(td);
294 continue;
295 }
296
297 /*
298 * Check for early abort. Other shared lock requestors
299 * could have sneaked in before we set LINKSPIN so make
300 * sure we undo the state properly.
301 */
302 if (link->state == MTX_LINK_ABORTED) {
303 if (mtx->mtx_shlink) {
304 atomic_clear_int(&mtx->mtx_lock,
305 MTX_LINKSPIN);
306 } else {
307 atomic_clear_int(&mtx->mtx_lock,
308 MTX_LINKSPIN |
309 MTX_SHWANTED);
310 }
311 crit_exit_quick(td);
312 link->state = MTX_LINK_IDLE;
313 error = ENOLCK;
314 break;
315 }
316
317 /*
318 * Add our link to the shlink list and release LINKSPIN.
319 */
320 link->owner = td;
321 link->state = MTX_LINK_LINKED_SH;
322 if (mtx->mtx_shlink) {
323 link->next = mtx->mtx_shlink;
324 link->prev = link->next->prev;
325 link->next->prev = link;
326 link->prev->next = link;
327 } else {
328 link->next = link;
329 link->prev = link;
330 mtx->mtx_shlink = link;
331 }
332 isasync = (link->callback != NULL);
333 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
334 crit_exit_quick(td);
335
336 /*
337 * If asynchronous lock request return without
338 * blocking, leave link structure linked.
339 */
340 if (isasync) {
341 error = EINPROGRESS;
342 break;
343 }
344
345 /*
346 * Wait for lock
347 */
348 error = mtx_wait_link(mtx, link, flags, to);
349 break;
350 }
351 return (error);
352 }
353
354 int
_mtx_lock_sh_link(mtx_t * mtx,mtx_link_t * link,int flags,int to)355 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
356 {
357 return(__mtx_lock_sh(mtx, link, flags, to));
358 }
359
360 int
_mtx_lock_sh(mtx_t * mtx,int flags,int to)361 _mtx_lock_sh(mtx_t *mtx, int flags, int to)
362 {
363 mtx_link_t link;
364
365 mtx_link_init(&link);
366 return(__mtx_lock_sh(mtx, &link, flags, to));
367 }
368
369 int
_mtx_lock_sh_quick(mtx_t * mtx)370 _mtx_lock_sh_quick(mtx_t *mtx)
371 {
372 mtx_link_t link;
373
374 mtx_link_init(&link);
375 return(__mtx_lock_sh(mtx, &link, 0, 0));
376 }
377
378 /*
379 * Get an exclusive spinlock the hard way.
380 */
381 void
_mtx_spinlock(mtx_t * mtx)382 _mtx_spinlock(mtx_t *mtx)
383 {
384 u_int lock;
385 u_int nlock;
386 int bb = 1;
387 int bo;
388
389 for (;;) {
390 lock = mtx->mtx_lock;
391 if (lock == 0) {
392 nlock = MTX_EXCLUSIVE | 1;
393 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
394 mtx->mtx_owner = curthread;
395 break;
396 }
397 } else if ((lock & MTX_EXCLUSIVE) &&
398 mtx->mtx_owner == curthread) {
399 KKASSERT((lock & MTX_MASK) != MTX_MASK);
400 nlock = lock + 1;
401 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
402 break;
403 } else {
404 /* MWAIT here */
405 if (bb < 1000)
406 ++bb;
407 cpu_pause();
408 for (bo = 0; bo < bb; ++bo)
409 ;
410 }
411 cpu_pause();
412 }
413 }
414
415 /*
416 * Attempt to acquire a spinlock, if we fail we must undo the
417 * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition.
418 *
419 * Returns 0 on success, EAGAIN on failure.
420 */
421 int
_mtx_spinlock_try(mtx_t * mtx)422 _mtx_spinlock_try(mtx_t *mtx)
423 {
424 globaldata_t gd = mycpu;
425 u_int lock;
426 u_int nlock;
427 int res = 0;
428
429 for (;;) {
430 lock = mtx->mtx_lock;
431 if (lock == 0) {
432 nlock = MTX_EXCLUSIVE | 1;
433 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
434 mtx->mtx_owner = gd->gd_curthread;
435 break;
436 }
437 } else if ((lock & MTX_EXCLUSIVE) &&
438 mtx->mtx_owner == gd->gd_curthread) {
439 KKASSERT((lock & MTX_MASK) != MTX_MASK);
440 nlock = lock + 1;
441 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
442 break;
443 } else {
444 --gd->gd_spinlocks;
445 cpu_ccfence();
446 crit_exit_quick(gd->gd_curthread);
447 res = EAGAIN;
448 break;
449 }
450 cpu_pause();
451 }
452 return res;
453 }
454
455 #if 0
456
457 void
458 _mtx_spinlock_sh(mtx_t *mtx)
459 {
460 u_int lock;
461 u_int nlock;
462 int bb = 1;
463 int bo;
464
465 for (;;) {
466 lock = mtx->mtx_lock;
467 if ((lock & MTX_EXCLUSIVE) == 0) {
468 KKASSERT((lock & MTX_MASK) != MTX_MASK);
469 nlock = lock + 1;
470 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
471 break;
472 } else {
473 /* MWAIT here */
474 if (bb < 1000)
475 ++bb;
476 cpu_pause();
477 for (bo = 0; bo < bb; ++bo)
478 ;
479 }
480 cpu_pause();
481 }
482 }
483
484 #endif
485
486 int
_mtx_lock_ex_try(mtx_t * mtx)487 _mtx_lock_ex_try(mtx_t *mtx)
488 {
489 u_int lock;
490 u_int nlock;
491 int error;
492
493 for (;;) {
494 lock = mtx->mtx_lock;
495 if (lock == 0) {
496 nlock = MTX_EXCLUSIVE | 1;
497 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
498 mtx->mtx_owner = curthread;
499 error = 0;
500 break;
501 }
502 } else if ((lock & MTX_EXCLUSIVE) &&
503 mtx->mtx_owner == curthread) {
504 KKASSERT((lock & MTX_MASK) != MTX_MASK);
505 nlock = lock + 1;
506 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
507 error = 0;
508 break;
509 }
510 } else {
511 error = EAGAIN;
512 break;
513 }
514 cpu_pause();
515 }
516 return (error);
517 }
518
519 int
_mtx_lock_sh_try(mtx_t * mtx)520 _mtx_lock_sh_try(mtx_t *mtx)
521 {
522 u_int lock;
523 u_int nlock;
524 int error = 0;
525
526 for (;;) {
527 lock = mtx->mtx_lock;
528 if ((lock & MTX_EXCLUSIVE) == 0) {
529 KKASSERT((lock & MTX_MASK) != MTX_MASK);
530 nlock = lock + 1;
531 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
532 break;
533 } else {
534 error = EAGAIN;
535 break;
536 }
537 cpu_pause();
538 }
539 return (error);
540 }
541
542 /*
543 * If the lock is held exclusively it must be owned by the caller. If the
544 * lock is already a shared lock this operation is a NOP. A panic will
545 * occur if the lock is not held either shared or exclusive.
546 *
547 * The exclusive count is converted to a shared count.
548 */
549 void
_mtx_downgrade(mtx_t * mtx)550 _mtx_downgrade(mtx_t *mtx)
551 {
552 u_int lock;
553 u_int nlock;
554
555 for (;;) {
556 lock = mtx->mtx_lock;
557 cpu_ccfence();
558
559 /*
560 * NOP if already shared.
561 */
562 if ((lock & MTX_EXCLUSIVE) == 0) {
563 KKASSERT((lock & MTX_MASK) > 0);
564 break;
565 }
566
567 /*
568 * Transfer count to shared. Any additional pending shared
569 * waiters must be woken up.
570 */
571 if (lock & MTX_SHWANTED) {
572 if (mtx_chain_link_sh(mtx, lock))
573 break;
574 /* retry */
575 } else {
576 nlock = lock & ~MTX_EXCLUSIVE;
577 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
578 break;
579 /* retry */
580 }
581 cpu_pause();
582 }
583 }
584
585 /*
586 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if
587 * the shared lock has a count other then 1. Optimize the most likely case
588 * but note that a single cmpset can fail due to WANTED races.
589 *
590 * If the lock is held exclusively it must be owned by the caller and
591 * this function will simply return without doing anything. A panic will
592 * occur if the lock is held exclusively by someone other then the caller.
593 *
594 * Returns 0 on success, EDEADLK on failure.
595 */
596 int
_mtx_upgrade_try(mtx_t * mtx)597 _mtx_upgrade_try(mtx_t *mtx)
598 {
599 u_int lock;
600 u_int nlock;
601 int error = 0;
602
603 for (;;) {
604 lock = mtx->mtx_lock;
605 cpu_ccfence();
606
607 if ((lock & ~MTX_EXWANTED) == 1) {
608 nlock = lock | MTX_EXCLUSIVE;
609 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
610 mtx->mtx_owner = curthread;
611 break;
612 }
613 } else if (lock & MTX_EXCLUSIVE) {
614 KKASSERT(mtx->mtx_owner == curthread);
615 break;
616 } else {
617 error = EDEADLK;
618 break;
619 }
620 cpu_pause();
621 }
622 return (error);
623 }
624
625 /*
626 * Unlock a lock. The caller must hold the lock either shared or exclusive.
627 *
628 * On the last release we handle any pending chains.
629 */
630 void
_mtx_unlock(mtx_t * mtx)631 _mtx_unlock(mtx_t *mtx)
632 {
633 thread_t td __debugvar = curthread;
634 u_int lock;
635 u_int nlock;
636
637 for (;;) {
638 lock = mtx->mtx_lock;
639 cpu_ccfence();
640
641 switch(lock) {
642 case MTX_EXCLUSIVE | 1:
643 /*
644 * Last release, exclusive lock.
645 * No exclusive or shared requests pending.
646 */
647 KKASSERT(mtx->mtx_owner == td ||
648 mtx->mtx_owner == NULL);
649 mtx->mtx_owner = NULL;
650 nlock = 0;
651 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
652 goto done;
653 break;
654 case MTX_EXCLUSIVE | MTX_EXWANTED | 1:
655 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1:
656 /*
657 * Last release, exclusive lock.
658 * Exclusive requests pending.
659 * Exclusive requests have priority over shared reqs.
660 */
661 KKASSERT(mtx->mtx_owner == td ||
662 mtx->mtx_owner == NULL);
663 mtx->mtx_owner = NULL;
664 if (mtx_chain_link_ex(mtx, lock))
665 goto done;
666 break;
667 case MTX_EXCLUSIVE | MTX_SHWANTED | 1:
668 /*
669 * Last release, exclusive lock.
670 *
671 * Shared requests are pending. Transfer our count (1)
672 * to the first shared request, wakeup all shared reqs.
673 */
674 KKASSERT(mtx->mtx_owner == td ||
675 mtx->mtx_owner == NULL);
676 mtx->mtx_owner = NULL;
677 if (mtx_chain_link_sh(mtx, lock))
678 goto done;
679 break;
680 case 1:
681 /*
682 * Last release, shared lock.
683 * No exclusive or shared requests pending.
684 */
685 nlock = 0;
686 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
687 goto done;
688 break;
689 case MTX_EXWANTED | 1:
690 case MTX_EXWANTED | MTX_SHWANTED | 1:
691 /*
692 * Last release, shared lock.
693 *
694 * Exclusive requests are pending. Upgrade this
695 * final shared lock to exclusive and transfer our
696 * count (1) to the next exclusive request.
697 *
698 * Exclusive requests have priority over shared reqs.
699 */
700 if (mtx_chain_link_ex(mtx, lock))
701 goto done;
702 break;
703 case MTX_SHWANTED | 1:
704 /*
705 * Last release, shared lock.
706 * Shared requests pending.
707 */
708 if (mtx_chain_link_sh(mtx, lock))
709 goto done;
710 break;
711 default:
712 /*
713 * We have to loop if this is the last release but
714 * someone is fiddling with LINKSPIN.
715 */
716 if ((lock & MTX_MASK) == 1) {
717 KKASSERT(lock & MTX_LINKSPIN);
718 break;
719 }
720
721 /*
722 * Not the last release (shared or exclusive)
723 */
724 nlock = lock - 1;
725 KKASSERT((nlock & MTX_MASK) != MTX_MASK);
726 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
727 goto done;
728 break;
729 }
730 /* loop try again */
731 cpu_pause();
732 }
733 done:
734 ;
735 }
736
737 /*
738 * Chain pending links. Called on the last release of an exclusive or
739 * shared lock when the appropriate WANTED bit is set. mtx_lock old state
740 * is passed in with the count left at 1, which we can inherit, and other
741 * bits which we must adjust in a single atomic operation.
742 *
743 * Return non-zero on success, 0 if caller needs to retry.
744 *
745 * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are
746 * acquiring LINKSPIN as all other cases will also need to acquire
747 * LINKSPIN when handling the EXWANTED case.
748 */
749 static int
mtx_chain_link_ex(mtx_t * mtx,u_int olock)750 mtx_chain_link_ex(mtx_t *mtx, u_int olock)
751 {
752 thread_t td = curthread;
753 mtx_link_t *link;
754 u_int nlock;
755
756 olock &= ~MTX_LINKSPIN;
757 nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE; /* upgrade if necc */
758 crit_enter_quick(td);
759 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
760 link = mtx->mtx_exlink;
761 KKASSERT(link != NULL);
762 if (link->next == link) {
763 mtx->mtx_exlink = NULL;
764 nlock = MTX_LINKSPIN | MTX_EXWANTED; /* to clear */
765 } else {
766 mtx->mtx_exlink = link->next;
767 link->next->prev = link->prev;
768 link->prev->next = link->next;
769 nlock = MTX_LINKSPIN; /* to clear */
770 }
771 KKASSERT(link->state == MTX_LINK_LINKED_EX);
772 mtx->mtx_owner = link->owner;
773 cpu_sfence();
774
775 /*
776 * WARNING! The callback can only be safely
777 * made with LINKSPIN still held
778 * and in a critical section.
779 *
780 * WARNING! The link can go away after the
781 * state is set, or after the
782 * callback.
783 */
784 if (link->callback) {
785 link->state = MTX_LINK_CALLEDBACK;
786 link->callback(link, link->arg, 0);
787 } else {
788 link->state = MTX_LINK_ACQUIRED;
789 wakeup(link);
790 }
791 atomic_clear_int(&mtx->mtx_lock, nlock);
792 crit_exit_quick(td);
793 return 1;
794 }
795 /* retry */
796 crit_exit_quick(td);
797
798 return 0;
799 }
800
801 /*
802 * Flush waiting shared locks. The lock's prior state is passed in and must
803 * be adjusted atomically only if it matches and LINKSPIN is not set.
804 *
805 * IMPORTANT! The caller has left one active count on the lock for us to
806 * consume. We will apply this to the first link, but must add
807 * additional counts for any other links.
808 */
809 static int
mtx_chain_link_sh(mtx_t * mtx,u_int olock)810 mtx_chain_link_sh(mtx_t *mtx, u_int olock)
811 {
812 thread_t td = curthread;
813 mtx_link_t *link;
814 u_int addcount;
815 u_int nlock;
816
817 olock &= ~MTX_LINKSPIN;
818 nlock = olock | MTX_LINKSPIN;
819 nlock &= ~MTX_EXCLUSIVE;
820 crit_enter_quick(td);
821 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
822 /*
823 * It should not be possible for SHWANTED to be set without
824 * any links pending.
825 */
826 KKASSERT(mtx->mtx_shlink != NULL);
827
828 /*
829 * We have to process the count for all shared locks before
830 * we process any of the links. Count the additional shared
831 * locks beyond the first link (which is already accounted
832 * for) and associate the full count with the lock
833 * immediately.
834 */
835 addcount = 0;
836 for (link = mtx->mtx_shlink->next; link != mtx->mtx_shlink;
837 link = link->next) {
838 ++addcount;
839 }
840 if (addcount > 0)
841 atomic_add_int(&mtx->mtx_lock, addcount);
842
843 /*
844 * We can wakeup all waiting shared locks.
845 */
846 while ((link = mtx->mtx_shlink) != NULL) {
847 KKASSERT(link->state == MTX_LINK_LINKED_SH);
848 if (link->next == link) {
849 mtx->mtx_shlink = NULL;
850 } else {
851 mtx->mtx_shlink = link->next;
852 link->next->prev = link->prev;
853 link->prev->next = link->next;
854 }
855 link->next = NULL;
856 link->prev = NULL;
857 cpu_sfence();
858 if (link->callback) {
859 link->state = MTX_LINK_CALLEDBACK;
860 link->callback(link, link->arg, 0);
861 } else {
862 cpu_sfence();
863 link->state = MTX_LINK_ACQUIRED;
864 wakeup(link);
865 }
866 }
867 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN |
868 MTX_SHWANTED);
869 crit_exit_quick(td);
870 return 1;
871 }
872 /* retry */
873 crit_exit_quick(td);
874
875 return 0;
876 }
877
878 /*
879 * Delete a link structure after tsleep has failed. This code is not
880 * in the critical path as most exclusive waits are chained.
881 */
882 static
883 void
mtx_delete_link(mtx_t * mtx,mtx_link_t * link)884 mtx_delete_link(mtx_t *mtx, mtx_link_t *link)
885 {
886 thread_t td = curthread;
887 u_int lock;
888 u_int nlock;
889
890 /*
891 * Acquire MTX_LINKSPIN.
892 *
893 * Do not use cmpxchg to wait for LINKSPIN to clear as this might
894 * result in too much cpu cache traffic.
895 */
896 crit_enter_quick(td);
897 for (;;) {
898 lock = mtx->mtx_lock;
899 if (lock & MTX_LINKSPIN) {
900 cpu_pause();
901 continue;
902 }
903 nlock = lock | MTX_LINKSPIN;
904 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
905 break;
906 cpu_pause();
907 }
908
909 /*
910 * Delete the link and release LINKSPIN.
911 */
912 nlock = MTX_LINKSPIN; /* to clear */
913
914 switch(link->state) {
915 case MTX_LINK_LINKED_EX:
916 if (link->next == link) {
917 mtx->mtx_exlink = NULL;
918 nlock |= MTX_EXWANTED; /* to clear */
919 } else {
920 mtx->mtx_exlink = link->next;
921 link->next->prev = link->prev;
922 link->prev->next = link->next;
923 }
924 break;
925 case MTX_LINK_LINKED_SH:
926 if (link->next == link) {
927 mtx->mtx_shlink = NULL;
928 nlock |= MTX_SHWANTED; /* to clear */
929 } else {
930 mtx->mtx_shlink = link->next;
931 link->next->prev = link->prev;
932 link->prev->next = link->next;
933 }
934 break;
935 default:
936 /* no change */
937 break;
938 }
939 atomic_clear_int(&mtx->mtx_lock, nlock);
940 crit_exit_quick(td);
941 }
942
943 /*
944 * Wait for async lock completion or abort. Returns ENOLCK if an abort
945 * occurred.
946 */
947 int
mtx_wait_link(mtx_t * mtx,mtx_link_t * link,int flags,int to)948 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
949 {
950 indefinite_info_t info;
951 int error;
952
953 indefinite_init(&info, mtx, mtx->mtx_ident, 1,
954 ((link->state & MTX_LINK_LINKED_SH) ? 'm' : 'M'));
955
956 /*
957 * Sleep. Handle false wakeups, interruptions, etc.
958 * The link may also have been aborted. The LINKED
959 * bit was set by this cpu so we can test it without
960 * fences.
961 */
962 error = 0;
963 while (link->state & MTX_LINK_LINKED) {
964 tsleep_interlock(link, 0);
965 cpu_lfence();
966 if (link->state & MTX_LINK_LINKED) {
967 error = tsleep(link, flags | PINTERLOCKED,
968 mtx->mtx_ident, to);
969 if (error)
970 break;
971 }
972 if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0)
973 indefinite_check(&info);
974 }
975
976 /*
977 * We need at least a lfence (load fence) to ensure our cpu does not
978 * reorder loads (of data outside the lock structure) prior to the
979 * remote cpu's release, since the above test may have run without
980 * any atomic interactions.
981 *
982 * If we do not do this then state updated by the other cpu before
983 * releasing its lock may not be read cleanly by our cpu when this
984 * function returns. Even though the other cpu ordered its stores,
985 * our loads can still be out of order.
986 */
987 cpu_mfence();
988
989 /*
990 * We are done, make sure the link structure is unlinked.
991 * It may still be on the list due to e.g. EINTR or
992 * EWOULDBLOCK.
993 *
994 * It is possible for the tsleep to race an ABORT and cause
995 * error to be 0.
996 *
997 * The tsleep() can be woken up for numerous reasons and error
998 * might be zero in situations where we intend to return an error.
999 *
1000 * (This is the synchronous case so state cannot be CALLEDBACK)
1001 */
1002 switch(link->state) {
1003 case MTX_LINK_ACQUIRED:
1004 case MTX_LINK_CALLEDBACK:
1005 error = 0;
1006 break;
1007 case MTX_LINK_ABORTED:
1008 error = ENOLCK;
1009 break;
1010 case MTX_LINK_LINKED_EX:
1011 case MTX_LINK_LINKED_SH:
1012 mtx_delete_link(mtx, link);
1013 /* fall through */
1014 default:
1015 if (error == 0)
1016 error = EWOULDBLOCK;
1017 break;
1018 }
1019
1020 /*
1021 * Clear state on status returned.
1022 */
1023 link->state = MTX_LINK_IDLE;
1024
1025 if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0)
1026 indefinite_done(&info);
1027
1028 return error;
1029 }
1030
1031 /*
1032 * Abort a mutex locking operation, causing mtx_lock_ex_link() to
1033 * return ENOLCK. This may be called at any time after the mtx_link
1034 * is initialized or the status from a previous lock has been
1035 * returned. If called prior to the next (non-try) lock attempt, the
1036 * next lock attempt using this link structure will abort instantly.
1037 *
1038 * Caller must still wait for the operation to complete, either from a
1039 * blocking call that is still in progress or by calling mtx_wait_link().
1040 *
1041 * If an asynchronous lock request is possibly in-progress, the caller
1042 * should call mtx_wait_link() synchronously. Note that the asynchronous
1043 * lock callback will NOT be called if a successful abort occurred. XXX
1044 */
1045 void
mtx_abort_link(mtx_t * mtx,mtx_link_t * link)1046 mtx_abort_link(mtx_t *mtx, mtx_link_t *link)
1047 {
1048 thread_t td = curthread;
1049 u_int lock;
1050 u_int nlock;
1051
1052 /*
1053 * Acquire MTX_LINKSPIN
1054 */
1055 crit_enter_quick(td);
1056 for (;;) {
1057 lock = mtx->mtx_lock;
1058 if (lock & MTX_LINKSPIN) {
1059 cpu_pause();
1060 continue;
1061 }
1062 nlock = lock | MTX_LINKSPIN;
1063 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
1064 break;
1065 cpu_pause();
1066 }
1067
1068 /*
1069 * Do the abort.
1070 *
1071 * WARNING! Link structure can disappear once link->state is set.
1072 */
1073 nlock = MTX_LINKSPIN; /* to clear */
1074
1075 switch(link->state) {
1076 case MTX_LINK_IDLE:
1077 /*
1078 * Link not started yet
1079 */
1080 link->state = MTX_LINK_ABORTED;
1081 break;
1082 case MTX_LINK_LINKED_EX:
1083 /*
1084 * de-link, mark aborted, and potentially wakeup the thread
1085 * or issue the callback.
1086 */
1087 if (link->next == link) {
1088 if (mtx->mtx_exlink == link) {
1089 mtx->mtx_exlink = NULL;
1090 nlock |= MTX_EXWANTED; /* to clear */
1091 }
1092 } else {
1093 if (mtx->mtx_exlink == link)
1094 mtx->mtx_exlink = link->next;
1095 link->next->prev = link->prev;
1096 link->prev->next = link->next;
1097 }
1098
1099 /*
1100 * When aborting the async callback is still made. We must
1101 * not set the link status to ABORTED in the callback case
1102 * since there is nothing else to clear its status if the
1103 * link is reused.
1104 */
1105 if (link->callback) {
1106 link->state = MTX_LINK_CALLEDBACK;
1107 link->callback(link, link->arg, ENOLCK);
1108 } else {
1109 link->state = MTX_LINK_ABORTED;
1110 wakeup(link);
1111 }
1112 break;
1113 case MTX_LINK_LINKED_SH:
1114 /*
1115 * de-link, mark aborted, and potentially wakeup the thread
1116 * or issue the callback.
1117 */
1118 if (link->next == link) {
1119 if (mtx->mtx_shlink == link) {
1120 mtx->mtx_shlink = NULL;
1121 nlock |= MTX_SHWANTED; /* to clear */
1122 }
1123 } else {
1124 if (mtx->mtx_shlink == link)
1125 mtx->mtx_shlink = link->next;
1126 link->next->prev = link->prev;
1127 link->prev->next = link->next;
1128 }
1129
1130 /*
1131 * When aborting the async callback is still made. We must
1132 * not set the link status to ABORTED in the callback case
1133 * since there is nothing else to clear its status if the
1134 * link is reused.
1135 */
1136 if (link->callback) {
1137 link->state = MTX_LINK_CALLEDBACK;
1138 link->callback(link, link->arg, ENOLCK);
1139 } else {
1140 link->state = MTX_LINK_ABORTED;
1141 wakeup(link);
1142 }
1143 break;
1144 case MTX_LINK_ACQUIRED:
1145 case MTX_LINK_CALLEDBACK:
1146 /*
1147 * Too late, the lock was acquired. Let it complete.
1148 */
1149 break;
1150 default:
1151 /*
1152 * link already aborted, do nothing.
1153 */
1154 break;
1155 }
1156 atomic_clear_int(&mtx->mtx_lock, nlock);
1157 crit_exit_quick(td);
1158 }
1159