1 /*
2 * Copyright (c) 2007-2011 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34 /*
35 * HAMMER structural locking
36 */
37
38 #include "hammer.h"
39
40 void
hammer_lock_ex_ident(struct hammer_lock * lock,const char * ident)41 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
42 {
43 thread_t td = curthread;
44 u_int lv;
45 u_int nlv;
46
47 KKASSERT(lock->refs);
48 for (;;) {
49 lv = lock->lockval;
50
51 if (lv == 0) {
52 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
53 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
54 lock->lowner = td;
55 break;
56 }
57 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
58 lock->lowner == td) {
59 nlv = (lv + 1);
60 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
61 break;
62 } else {
63 if (hammer_debug_locks) {
64 hdkprintf("held by %p\n", lock->lowner);
65 }
66 nlv = lv | HAMMER_LOCKF_WANTED;
67 ++hammer_contention_count;
68 tsleep_interlock(&lock->lockval, 0);
69 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
70 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
71 if (hammer_debug_locks)
72 hdkprintf("try again\n");
73 }
74 }
75 }
76 }
77
78 /*
79 * Try to obtain an exclusive lock
80 */
81 int
hammer_lock_ex_try(struct hammer_lock * lock)82 hammer_lock_ex_try(struct hammer_lock *lock)
83 {
84 thread_t td = curthread;
85 int error;
86 u_int lv;
87 u_int nlv;
88
89 KKASSERT(lock->refs);
90 for (;;) {
91 lv = lock->lockval;
92
93 if (lv == 0) {
94 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
95 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
96 lock->lowner = td;
97 error = 0;
98 break;
99 }
100 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
101 lock->lowner == td) {
102 nlv = (lv + 1);
103 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
104 error = 0;
105 break;
106 }
107 } else {
108 error = EAGAIN;
109 break;
110 }
111 }
112 return (error);
113 }
114
115 /*
116 * Obtain a shared lock
117 *
118 * We do not give pending exclusive locks priority over shared locks as
119 * doing so could lead to a deadlock.
120 */
121 void
hammer_lock_sh(struct hammer_lock * lock)122 hammer_lock_sh(struct hammer_lock *lock)
123 {
124 thread_t td = curthread;
125 u_int lv;
126 u_int nlv;
127 const char *ident = "hmrlck";
128
129 KKASSERT(lock->refs);
130 for (;;) {
131 lv = lock->lockval;
132
133 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
134 nlv = (lv + 1);
135 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
136 break;
137 } else if (lock->lowner == td) {
138 /*
139 * Disallowed case, drop into kernel debugger for
140 * now. A cont continues w/ an exclusive lock.
141 */
142 nlv = (lv + 1);
143 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
144 if (hammer_debug_critical)
145 Debugger("hammer_lock_sh: holding ex");
146 break;
147 }
148 } else {
149 nlv = lv | HAMMER_LOCKF_WANTED;
150 ++hammer_contention_count;
151 tsleep_interlock(&lock->lockval, 0);
152 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
153 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
154 }
155 }
156 }
157
158 int
hammer_lock_sh_try(struct hammer_lock * lock)159 hammer_lock_sh_try(struct hammer_lock *lock)
160 {
161 thread_t td = curthread;
162 u_int lv;
163 u_int nlv;
164 int error;
165
166 KKASSERT(lock->refs);
167 for (;;) {
168 lv = lock->lockval;
169
170 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
171 nlv = (lv + 1);
172 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
173 error = 0;
174 break;
175 }
176 } else if (lock->lowner == td) {
177 /*
178 * Disallowed case, drop into kernel debugger for
179 * now. A cont continues w/ an exclusive lock.
180 */
181 nlv = (lv + 1);
182 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
183 if (hammer_debug_critical)
184 Debugger("hammer_lock_sh: holding ex");
185 error = 0;
186 break;
187 }
188 } else {
189 error = EAGAIN;
190 break;
191 }
192 }
193 return (error);
194 }
195
196 /*
197 * Upgrade a shared lock to an exclusively held lock. This function will
198 * return EDEADLK If there is more then one shared holder.
199 *
200 * No error occurs and no action is taken if the lock is already exclusively
201 * held by the caller. If the lock is not held at all or held exclusively
202 * by someone else, this function will panic.
203 */
204 int
hammer_lock_upgrade(struct hammer_lock * lock,int shcount)205 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
206 {
207 thread_t td = curthread;
208 u_int lv;
209 u_int nlv;
210 int error;
211
212 for (;;) {
213 lv = lock->lockval;
214
215 if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
216 nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
217 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
218 lock->lowner = td;
219 error = 0;
220 break;
221 }
222 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
223 if (lock->lowner != curthread)
224 hpanic("illegal state");
225 error = 0;
226 break;
227 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
228 hpanic("lock is not held");
229 /* NOT REACHED */
230 error = EDEADLK;
231 break;
232 } else {
233 error = EDEADLK;
234 break;
235 }
236 }
237 return (error);
238 }
239
240 /*
241 * Downgrade an exclusively held lock to a shared lock.
242 */
243 void
hammer_lock_downgrade(struct hammer_lock * lock,int shcount)244 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
245 {
246 thread_t td __debugvar = curthread;
247 u_int lv;
248 u_int nlv;
249
250 KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
251 (HAMMER_LOCKF_EXCLUSIVE | shcount));
252 KKASSERT(lock->lowner == td);
253
254 /*
255 * NOTE: Must clear owner before releasing exclusivity
256 */
257 lock->lowner = NULL;
258
259 for (;;) {
260 lv = lock->lockval;
261 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
262 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
263 if (lv & HAMMER_LOCKF_WANTED)
264 wakeup(&lock->lockval);
265 break;
266 }
267 }
268 }
269
270 void
hammer_unlock(struct hammer_lock * lock)271 hammer_unlock(struct hammer_lock *lock)
272 {
273 thread_t td __debugvar = curthread;
274 u_int lv;
275 u_int nlv;
276
277 lv = lock->lockval;
278 KKASSERT(lv != 0);
279 if (lv & HAMMER_LOCKF_EXCLUSIVE)
280 KKASSERT(lock->lowner == td);
281
282 for (;;) {
283 lv = lock->lockval;
284 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
285 if (nlv > 1) {
286 nlv = lv - 1;
287 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
288 break;
289 } else if (nlv == 1) {
290 nlv = 0;
291 if (lv & HAMMER_LOCKF_EXCLUSIVE)
292 lock->lowner = NULL;
293 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
294 if (lv & HAMMER_LOCKF_WANTED)
295 wakeup(&lock->lockval);
296 break;
297 }
298 } else {
299 hpanic("lock %p is not held", lock);
300 }
301 }
302 }
303
304 /*
305 * The calling thread must be holding a shared or exclusive lock.
306 * Returns < 0 if lock is held shared, and > 0 if held exlusively.
307 */
308 int
hammer_lock_status(struct hammer_lock * lock)309 hammer_lock_status(struct hammer_lock *lock)
310 {
311 u_int lv = lock->lockval;
312
313 if (lv & HAMMER_LOCKF_EXCLUSIVE)
314 return(1);
315 else if (lv)
316 return(-1);
317 hpanic("lock must be held: %p", lock);
318 }
319
320 /*
321 * Bump the ref count for a lock (not the excl/share count, but a separate
322 * structural reference count). The CHECK flag will be set on a 0->1
323 * transition.
324 *
325 * This function does nothing to serialize races between multple threads.
326 * The caller can interlock it later on to deal with serialization.
327 *
328 * MPSAFE
329 */
330 void
hammer_ref(struct hammer_lock * lock)331 hammer_ref(struct hammer_lock *lock)
332 {
333 u_int lv;
334 u_int nlv;
335
336 for (;;) {
337 lv = lock->refs;
338 if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
339 nlv = (lv + 1) | HAMMER_REFS_CHECK;
340 if (atomic_cmpset_int(&lock->refs, lv, nlv))
341 return;
342 } else {
343 nlv = (lv + 1);
344 KKASSERT((int)nlv > 0);
345 if (atomic_cmpset_int(&lock->refs, lv, nlv))
346 return;
347 }
348 }
349 /* not reached */
350 }
351
352 /*
353 * Drop the ref count for a lock (not the excl/share count, but a separate
354 * structural reference count). The CHECK flag will be cleared on a 1->0
355 * transition.
356 *
357 * This function does nothing to serialize races between multple threads.
358 *
359 * MPSAFE
360 */
361 void
hammer_rel(struct hammer_lock * lock)362 hammer_rel(struct hammer_lock *lock)
363 {
364 u_int lv;
365 u_int nlv;
366
367 for (;;) {
368 lv = lock->refs;
369 if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
370 nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
371 if (atomic_cmpset_int(&lock->refs, lv, nlv))
372 return;
373 } else {
374 KKASSERT((int)lv > 0);
375 nlv = (lv - 1);
376 if (atomic_cmpset_int(&lock->refs, lv, nlv))
377 return;
378 }
379 }
380 /* not reached */
381 }
382
383 /*
384 * The hammer_*_interlock() and hammer_*_interlock_done() functions are
385 * more sophisticated versions which handle MP transition races and block
386 * when necessary.
387 *
388 * hammer_ref_interlock() bumps the ref-count and conditionally acquires
389 * the interlock for 0->1 transitions or if the CHECK is found to be set.
390 *
391 * This case will return 1, the interlock will be held, and the CHECK
392 * bit also set. Other threads attempting to ref will see the CHECK bit
393 * and block until we clean up.
394 *
395 * 0 is returned for transitions other than 0->1 when the CHECK bit
396 * is not found to be set, or if the function loses the race with another
397 * thread.
398 *
399 * 1 is only returned to one thread and the others will block.
400 * Effectively a 1 indicator means 'someone transitioned 0->1
401 * and you are the first guy to successfully lock it after that, so you
402 * need to check'. Due to races the ref-count may be greater than 1 upon
403 * return.
404 *
405 * MPSAFE
406 */
407 int
hammer_ref_interlock(struct hammer_lock * lock)408 hammer_ref_interlock(struct hammer_lock *lock)
409 {
410 u_int lv;
411 u_int nlv;
412
413 /*
414 * Integrated reference count bump, lock, and check, with hot-path.
415 *
416 * (a) Return 1 (+LOCKED, +CHECK) 0->1 transition
417 * (b) Return 0 (-LOCKED, -CHECK) N->N+1 transition
418 * (c) Break out (+CHECK) Check condition and Cannot lock
419 * (d) Return 1 (+LOCKED, +CHECK) Successfully locked
420 */
421 for (;;) {
422 lv = lock->refs;
423 if (lv == 0) {
424 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
425 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
426 lock->rowner = curthread;
427 return(1);
428 }
429 } else {
430 nlv = (lv + 1);
431 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
432 nlv |= HAMMER_REFS_CHECK;
433 if ((nlv & HAMMER_REFS_CHECK) == 0) {
434 if (atomic_cmpset_int(&lock->refs, lv, nlv))
435 return(0);
436 } else if (lv & HAMMER_REFS_LOCKED) {
437 /* CHECK also set here */
438 if (atomic_cmpset_int(&lock->refs, lv, nlv))
439 break;
440 } else {
441 /* CHECK also set here */
442 nlv |= HAMMER_REFS_LOCKED;
443 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
444 lock->rowner = curthread;
445 return(1);
446 }
447 }
448 }
449 }
450
451 /*
452 * Defered check condition because we were unable to acquire the
453 * lock. We must block until the check condition is cleared due
454 * to a race with another thread, or we are able to acquire the
455 * lock.
456 *
457 * (a) Return 0 (-CHECK) Another thread handled it
458 * (b) Return 1 (+LOCKED, +CHECK) We handled it.
459 */
460 for (;;) {
461 lv = lock->refs;
462 if ((lv & HAMMER_REFS_CHECK) == 0)
463 return(0);
464 if (lv & HAMMER_REFS_LOCKED) {
465 tsleep_interlock(&lock->refs, 0);
466 nlv = (lv | HAMMER_REFS_WANTED);
467 if (atomic_cmpset_int(&lock->refs, lv, nlv))
468 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
469 } else {
470 /* CHECK also set here */
471 nlv = lv | HAMMER_REFS_LOCKED;
472 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
473 lock->rowner = curthread;
474 return(1);
475 }
476 }
477 }
478 /* not reached */
479 }
480
481 /*
482 * This is the same as hammer_ref_interlock() but asserts that the
483 * 0->1 transition is always true, thus the lock must have no references
484 * on entry or have CHECK set, and will have one reference with the
485 * interlock held on return. It must also not be interlocked on entry
486 * by anyone.
487 *
488 * NOTE that CHECK will never be found set when the ref-count is 0.
489 *
490 * 1 is always returned to match the API for hammer_ref_interlock().
491 * This function returns with one ref, the lock held, and the CHECK bit set.
492 */
493 int
hammer_ref_interlock_true(struct hammer_lock * lock)494 hammer_ref_interlock_true(struct hammer_lock *lock)
495 {
496 u_int lv;
497 u_int nlv;
498
499 for (;;) {
500 lv = lock->refs;
501
502 if (lv) {
503 hpanic("bad lock %p %08x", lock, lock->refs);
504 }
505 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
506 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
507 lock->rowner = curthread;
508 return (1);
509 }
510 }
511 }
512
513 /*
514 * Unlock the interlock acquired by hammer_ref_interlock() and clear the
515 * CHECK flag. The ref-count remains unchanged.
516 *
517 * This routine is called in the load path when the load succeeds.
518 */
519 void
hammer_ref_interlock_done(struct hammer_lock * lock)520 hammer_ref_interlock_done(struct hammer_lock *lock)
521 {
522 u_int lv;
523 u_int nlv;
524
525 for (;;) {
526 lv = lock->refs;
527 nlv = lv & ~HAMMER_REFS_FLAGS;
528 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
529 if (lv & HAMMER_REFS_WANTED)
530 wakeup(&lock->refs);
531 break;
532 }
533 }
534 }
535
536 /*
537 * hammer_rel_interlock() works a bit differently in that it must
538 * acquire the lock in tandem with a 1->0 transition. CHECK is
539 * not used.
540 *
541 * 1 is returned on 1->0 transitions with the lock held on return
542 * and 0 is returned otherwise with the lock not held.
543 *
544 * It is important to note that the refs are not stable and may
545 * increase while we hold the lock, the 1 indication only means
546 * that we transitioned 1->0, not necessarily that we stayed at 0.
547 *
548 * Another thread bumping refs while we hold the lock will set CHECK,
549 * causing one of the competing hammer_ref_interlock() calls to
550 * return 1 after we release our lock.
551 *
552 * MPSAFE
553 */
554 int
hammer_rel_interlock(struct hammer_lock * lock,int locked)555 hammer_rel_interlock(struct hammer_lock *lock, int locked)
556 {
557 u_int lv;
558 u_int nlv;
559
560 /*
561 * In locked mode (failure/unload path) we release the
562 * ref-count but leave it locked.
563 */
564 if (locked) {
565 hammer_rel(lock);
566 return(1);
567 }
568
569 /*
570 * Integrated reference count drop with LOCKED, plus the hot-path
571 * returns.
572 */
573 for (;;) {
574 lv = lock->refs;
575
576 if (lv == 1) {
577 nlv = 0 | HAMMER_REFS_LOCKED;
578 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
579 lock->rowner = curthread;
580 return(1);
581 }
582 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
583 if ((lv & HAMMER_REFS_LOCKED) == 0) {
584 nlv = (lv - 1) | HAMMER_REFS_LOCKED;
585 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
586 lock->rowner = curthread;
587 return(1);
588 }
589 } else {
590 nlv = lv | HAMMER_REFS_WANTED;
591 tsleep_interlock(&lock->refs, 0);
592 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
593 tsleep(&lock->refs, PINTERLOCKED,
594 "h0lk", 0);
595 }
596 }
597 } else {
598 nlv = (lv - 1);
599 KKASSERT((int)nlv >= 0);
600 if (atomic_cmpset_int(&lock->refs, lv, nlv))
601 return(0);
602 }
603 }
604 /* not reached */
605 }
606
607 /*
608 * Unlock the interlock acquired by hammer_rel_interlock().
609 *
610 * If orig_locked is non-zero the interlock was originally held prior to
611 * the hammer_rel_interlock() call and passed through to us. In this
612 * case we want to retain the CHECK error state if not transitioning
613 * to 0.
614 *
615 * The code is the same either way so we do not have to conditionalize
616 * on orig_locked.
617 */
618 void
hammer_rel_interlock_done(struct hammer_lock * lock,int orig_locked __unused)619 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
620 {
621 u_int lv;
622 u_int nlv;
623
624 for (;;) {
625 lv = lock->refs;
626 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
627 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
628 nlv &= ~HAMMER_REFS_CHECK;
629 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
630 if (lv & HAMMER_REFS_WANTED)
631 wakeup(&lock->refs);
632 break;
633 }
634 }
635 }
636
637 /*
638 * Acquire the interlock on lock->refs.
639 *
640 * Return 1 if CHECK is currently set. Note that CHECK will not
641 * be set if the reference count is 0, but can get set if this function
642 * is preceeded by, say, hammer_ref(), or through races with other
643 * threads. The return value allows the caller to use the same logic
644 * as hammer_ref_interlock().
645 *
646 * MPSAFE
647 */
648 int
hammer_get_interlock(struct hammer_lock * lock)649 hammer_get_interlock(struct hammer_lock *lock)
650 {
651 u_int lv;
652 u_int nlv;
653
654 for (;;) {
655 lv = lock->refs;
656 if (lv & HAMMER_REFS_LOCKED) {
657 nlv = lv | HAMMER_REFS_WANTED;
658 tsleep_interlock(&lock->refs, 0);
659 if (atomic_cmpset_int(&lock->refs, lv, nlv))
660 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
661 } else {
662 nlv = (lv | HAMMER_REFS_LOCKED);
663 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
664 lock->rowner = curthread;
665 return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
666 }
667 }
668 }
669 }
670
671 /*
672 * Attempt to acquire the interlock and expect 0 refs. Used by the buffer
673 * cache callback code to disassociate or lock the bufs related to HAMMER
674 * structures.
675 *
676 * During teardown the related bp will be acquired by hammer_io_release()
677 * which interocks our test.
678 *
679 * Returns non-zero on success, zero on failure.
680 */
681 int
hammer_try_interlock_norefs(struct hammer_lock * lock)682 hammer_try_interlock_norefs(struct hammer_lock *lock)
683 {
684 u_int lv;
685 u_int nlv;
686
687 for (;;) {
688 lv = lock->refs;
689 if (lv == 0) {
690 nlv = lv | HAMMER_REFS_LOCKED;
691 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
692 lock->rowner = curthread;
693 return(1);
694 }
695 } else {
696 return(0);
697 }
698 }
699 /* not reached */
700 }
701
702 /*
703 * Release the interlock on lock->refs. This function will set
704 * CHECK if the refs is non-zero and error is non-zero, and clear
705 * CHECK otherwise.
706 *
707 * MPSAFE
708 */
709 void
hammer_put_interlock(struct hammer_lock * lock,int error)710 hammer_put_interlock(struct hammer_lock *lock, int error)
711 {
712 u_int lv;
713 u_int nlv;
714
715 for (;;) {
716 lv = lock->refs;
717 KKASSERT(lv & HAMMER_REFS_LOCKED);
718 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
719
720 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
721 nlv &= ~HAMMER_REFS_CHECK;
722 else
723 nlv |= HAMMER_REFS_CHECK;
724
725 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
726 if (lv & HAMMER_REFS_WANTED)
727 wakeup(&lock->refs);
728 return;
729 }
730 }
731 }
732
733 /*
734 * The sync_lock must be held when doing any modifying operations on
735 * meta-data. It does not have to be held when modifying non-meta-data buffers
736 * (backend or frontend).
737 *
738 * The flusher holds the lock exclusively while all other consumers hold it
739 * shared. All modifying operations made while holding the lock are atomic
740 * in that they will be made part of the same flush group.
741 *
742 * Due to the atomicy requirement deadlock recovery code CANNOT release the
743 * sync lock, nor can we give pending exclusive sync locks priority over
744 * a shared sync lock as this could lead to a 3-way deadlock.
745 */
746 void
hammer_sync_lock_ex(hammer_transaction_t trans)747 hammer_sync_lock_ex(hammer_transaction_t trans)
748 {
749 ++trans->sync_lock_refs;
750 hammer_lock_ex(&trans->hmp->sync_lock);
751 }
752
753 void
hammer_sync_lock_sh(hammer_transaction_t trans)754 hammer_sync_lock_sh(hammer_transaction_t trans)
755 {
756 ++trans->sync_lock_refs;
757 hammer_lock_sh(&trans->hmp->sync_lock);
758 }
759
760 int
hammer_sync_lock_sh_try(hammer_transaction_t trans)761 hammer_sync_lock_sh_try(hammer_transaction_t trans)
762 {
763 int error;
764
765 ++trans->sync_lock_refs;
766 if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
767 --trans->sync_lock_refs;
768 return (error);
769 }
770
771 void
hammer_sync_unlock(hammer_transaction_t trans)772 hammer_sync_unlock(hammer_transaction_t trans)
773 {
774 --trans->sync_lock_refs;
775 hammer_unlock(&trans->hmp->sync_lock);
776 }
777
778 /*
779 * Misc
780 */
781 uint32_t
hammer_to_unix_xid(hammer_uuid_t * uuid)782 hammer_to_unix_xid(hammer_uuid_t *uuid)
783 {
784 return(*(uint32_t *)&uuid->node[2]);
785 }
786
787 void
hammer_guid_to_uuid(hammer_uuid_t * uuid,uint32_t guid)788 hammer_guid_to_uuid(hammer_uuid_t *uuid, uint32_t guid)
789 {
790 bzero(uuid, sizeof(*uuid));
791 *(uint32_t *)&uuid->node[2] = guid;
792 }
793
794 void
hammer_time_to_timespec(uint64_t xtime,struct timespec * ts)795 hammer_time_to_timespec(uint64_t xtime, struct timespec *ts)
796 {
797 ts->tv_sec = (unsigned long)(xtime / 1000000);
798 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
799 }
800
801 uint64_t
hammer_timespec_to_time(struct timespec * ts)802 hammer_timespec_to_time(struct timespec *ts)
803 {
804 uint64_t xtime;
805
806 xtime = (unsigned)(ts->tv_nsec / 1000) +
807 (unsigned long)ts->tv_sec * 1000000ULL;
808 return(xtime);
809 }
810
811
812 /*
813 * Convert a HAMMER filesystem object type to a vnode type
814 */
815 enum vtype
hammer_get_vnode_type(uint8_t obj_type)816 hammer_get_vnode_type(uint8_t obj_type)
817 {
818 switch(obj_type) {
819 case HAMMER_OBJTYPE_DIRECTORY:
820 return(VDIR);
821 case HAMMER_OBJTYPE_REGFILE:
822 return(VREG);
823 case HAMMER_OBJTYPE_DBFILE:
824 return(VDATABASE);
825 case HAMMER_OBJTYPE_FIFO:
826 return(VFIFO);
827 case HAMMER_OBJTYPE_SOCKET:
828 return(VSOCK);
829 case HAMMER_OBJTYPE_CDEV:
830 return(VCHR);
831 case HAMMER_OBJTYPE_BDEV:
832 return(VBLK);
833 case HAMMER_OBJTYPE_SOFTLINK:
834 return(VLNK);
835 default:
836 return(VBAD);
837 }
838 /* not reached */
839 }
840
841 int
hammer_get_dtype(uint8_t obj_type)842 hammer_get_dtype(uint8_t obj_type)
843 {
844 switch(obj_type) {
845 case HAMMER_OBJTYPE_DIRECTORY:
846 return(DT_DIR);
847 case HAMMER_OBJTYPE_REGFILE:
848 return(DT_REG);
849 case HAMMER_OBJTYPE_DBFILE:
850 return(DT_DBF);
851 case HAMMER_OBJTYPE_FIFO:
852 return(DT_FIFO);
853 case HAMMER_OBJTYPE_SOCKET:
854 return(DT_SOCK);
855 case HAMMER_OBJTYPE_CDEV:
856 return(DT_CHR);
857 case HAMMER_OBJTYPE_BDEV:
858 return(DT_BLK);
859 case HAMMER_OBJTYPE_SOFTLINK:
860 return(DT_LNK);
861 default:
862 return(DT_UNKNOWN);
863 }
864 /* not reached */
865 }
866
867 uint8_t
hammer_get_obj_type(enum vtype vtype)868 hammer_get_obj_type(enum vtype vtype)
869 {
870 switch(vtype) {
871 case VDIR:
872 return(HAMMER_OBJTYPE_DIRECTORY);
873 case VREG:
874 return(HAMMER_OBJTYPE_REGFILE);
875 case VDATABASE:
876 return(HAMMER_OBJTYPE_DBFILE);
877 case VFIFO:
878 return(HAMMER_OBJTYPE_FIFO);
879 case VSOCK:
880 return(HAMMER_OBJTYPE_SOCKET);
881 case VCHR:
882 return(HAMMER_OBJTYPE_CDEV);
883 case VBLK:
884 return(HAMMER_OBJTYPE_BDEV);
885 case VLNK:
886 return(HAMMER_OBJTYPE_SOFTLINK);
887 default:
888 return(HAMMER_OBJTYPE_UNKNOWN);
889 }
890 /* not reached */
891 }
892
893 /*
894 * Return flags for hammer_delete_at_cursor()
895 */
896 int
hammer_nohistory(hammer_inode_t ip)897 hammer_nohistory(hammer_inode_t ip)
898 {
899 if (ip->hmp->hflags & HMNT_NOHISTORY)
900 return(HAMMER_DELETE_DESTROY);
901 if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
902 return(HAMMER_DELETE_DESTROY);
903 return(0);
904 }
905
906 /*
907 * ALGORITHM VERSION 0:
908 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit
909 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits
910 * to generate a unique key and will scan all entries with the same upper
911 * 32 bits when issuing a lookup.
912 *
913 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
914 *
915 * ALGORITHM VERSION 1:
916 *
917 * This algorithm breaks the filename down into a separate 32-bit crcs
918 * for each filename segment separated by a special character (dot,
919 * underscore, underline, or tilde). The CRCs are then added together.
920 * This allows temporary names. A full-filename 16 bit crc is also
921 * generated to deal with degenerate conditions.
922 *
923 * The algorithm is designed to handle create/rename situations such
924 * that a create with an extention to a rename without an extention
925 * only shifts the key space rather than randomizes it.
926 *
927 * NOTE: The inode allocator cache can only match 10 bits so we do
928 * not really have any room for a partial sorted name, and
929 * numbers don't sort well in that situation anyway.
930 *
931 * 0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
932 *
933 *
934 * We strip bit 63 in order to provide a positive key, this way a seek
935 * offset of 0 will represent the base of the directory.
936 *
937 * We usually strip bit 0 (set it to 0) in order to provide a consistent
938 * iteration space for collisions.
939 *
940 * This function can never return 0. We use the MSB-0 space to synthesize
941 * artificial directory entries such as "." and "..".
942 */
943 int64_t
hammer_direntry_namekey(hammer_inode_t dip,const void * name,int len,uint32_t * max_iterationsp)944 hammer_direntry_namekey(hammer_inode_t dip, const void *name, int len,
945 uint32_t *max_iterationsp)
946 {
947 const char *aname = name;
948 int32_t crcx;
949 int64_t key;
950 int i;
951 int j;
952
953 switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
954 case HAMMER_INODE_CAP_DIRHASH_ALG0:
955 /*
956 * Original algorithm
957 */
958 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
959 if (key == 0)
960 key |= 0x100000000LL;
961 *max_iterationsp = 0xFFFFFFFFU;
962 break;
963 case HAMMER_INODE_CAP_DIRHASH_ALG1:
964 /*
965 * Filesystem version 6 or better will create directories
966 * using the ALG1 dirhash. This hash breaks the filename
967 * up into domains separated by special characters and
968 * hashes each domain independently.
969 *
970 * We also do a simple sub-sort using the first character
971 * of the filename in the top 5-bits.
972 */
973 key = 0;
974
975 /*
976 * m32
977 */
978 crcx = 0;
979 for (i = j = 0; i < len; ++i) {
980 if (aname[i] == '.' ||
981 aname[i] == '-' ||
982 aname[i] == '_' ||
983 aname[i] == '~') {
984 if (i != j)
985 crcx += crc32(aname + j, i - j);
986 j = i + 1;
987 }
988 }
989 if (i != j)
990 crcx += crc32(aname + j, i - j);
991
992 #if 0
993 /*
994 * xor top 5 bits 0mmmm into low bits and steal the top 5
995 * bits as a semi sub sort using the first character of
996 * the filename. bit 63 is always left as 0 so directory
997 * keys are positive numbers.
998 */
999 crcx ^= (uint32_t)crcx >> (32 - 5);
1000 crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1001 #endif
1002 crcx &= 0x7FFFFFFFU;
1003
1004 key |= (uint64_t)crcx << 32;
1005
1006 /*
1007 * l16 - crc of entire filename
1008 *
1009 * This crc reduces degenerate hash collision conditions
1010 */
1011 crcx = crc32(aname, len);
1012 crcx = crcx ^ (crcx << 16);
1013 key |= crcx & 0xFFFF0000U;
1014
1015 /*
1016 * Cleanup
1017 */
1018 if ((key & 0xFFFFFFFF00000000LL) == 0)
1019 key |= 0x100000000LL;
1020 if (hammer_debug_general & 0x0400) {
1021 hdkprintf("0x%016jx %*.*s\n",
1022 (intmax_t)key, len, len, aname);
1023 }
1024 *max_iterationsp = 0x00FFFFFF;
1025 break;
1026 case HAMMER_INODE_CAP_DIRHASH_ALG2:
1027 case HAMMER_INODE_CAP_DIRHASH_ALG3:
1028 default:
1029 key = 0; /* compiler warning */
1030 *max_iterationsp = 1; /* sanity */
1031 hpanic("bad algorithm %p", dip);
1032 break;
1033 }
1034 return(key);
1035 }
1036
1037 /*
1038 * Convert string after @@ (@@ not included) to TID. Returns 0 on success,
1039 * EINVAL on failure.
1040 *
1041 * If this function fails *ispfs, *tidp, and *localizationp will not
1042 * be modified.
1043 */
1044 int
hammer_str_to_tid(const char * str,int * ispfsp,hammer_tid_t * tidp,uint32_t * localizationp)1045 hammer_str_to_tid(const char *str, int *ispfsp,
1046 hammer_tid_t *tidp, uint32_t *localizationp)
1047 {
1048 hammer_tid_t tid;
1049 uint32_t localization;
1050 char *ptr;
1051 int ispfs;
1052 int n;
1053
1054 /*
1055 * Forms allowed for TID: "0x%016llx"
1056 * "-1"
1057 */
1058 tid = strtouq(str, &ptr, 0);
1059 n = ptr - str;
1060 if (n == 2 && str[0] == '-' && str[1] == '1') {
1061 /* ok */
1062 } else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1063 /* ok */
1064 } else {
1065 return(EINVAL);
1066 }
1067
1068 /*
1069 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal).
1070 */
1071 str = ptr;
1072 if (*str == ':') {
1073 localization = pfs_to_lo(strtoul(str + 1, &ptr, 10));
1074 if (ptr - str != 6)
1075 return(EINVAL);
1076 str = ptr;
1077 ispfs = 1;
1078 } else {
1079 localization = *localizationp;
1080 ispfs = 0;
1081 }
1082
1083 /*
1084 * Any trailing junk invalidates special extension handling.
1085 */
1086 if (*str)
1087 return(EINVAL);
1088 *tidp = tid;
1089 *localizationp = localization;
1090 *ispfsp = ispfs;
1091 return(0);
1092 }
1093
1094 /*
1095 * Return the block size at the specified file offset.
1096 */
1097 int
hammer_blocksize(int64_t file_offset)1098 hammer_blocksize(int64_t file_offset)
1099 {
1100 if (file_offset < HAMMER_XDEMARC)
1101 return(HAMMER_BUFSIZE);
1102 else
1103 return(HAMMER_XBUFSIZE);
1104 }
1105
1106 int
hammer_blockoff(int64_t file_offset)1107 hammer_blockoff(int64_t file_offset)
1108 {
1109 if (file_offset < HAMMER_XDEMARC)
1110 return((int)file_offset & HAMMER_BUFMASK);
1111 else
1112 return((int)file_offset & HAMMER_XBUFMASK);
1113 }
1114
1115 /*
1116 * Return the demarkation point between the two offsets where
1117 * the block size changes.
1118 */
1119 int64_t
hammer_blockdemarc(int64_t file_offset1,int64_t file_offset2)1120 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1121 {
1122 if (file_offset1 < HAMMER_XDEMARC) {
1123 if (file_offset2 <= HAMMER_XDEMARC)
1124 return(file_offset2);
1125 return(HAMMER_XDEMARC);
1126 }
1127 hpanic("illegal range %jd %jd",
1128 (intmax_t)file_offset1, (intmax_t)file_offset2);
1129 }
1130
1131 dev_t
hammer_fsid_to_udev(hammer_uuid_t * uuid)1132 hammer_fsid_to_udev(hammer_uuid_t *uuid)
1133 {
1134 uint32_t crc;
1135
1136 crc = crc32(uuid, sizeof(*uuid));
1137 return((dev_t)crc);
1138 }
1139
1140