xref: /dragonfly/sys/vfs/hammer/hammer_subs.c (revision a563ca70)
1 /*
2  * Copyright (c) 2007-2011 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
35  */
36 /*
37  * HAMMER structural locking
38  */
39 
40 #include "hammer.h"
41 #include <sys/dirent.h>
42 
43 void
44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
45 {
46 	thread_t td = curthread;
47 	u_int lv;
48 	u_int nlv;
49 
50 	KKASSERT(lock->refs);
51 	for (;;) {
52 		lv = lock->lockval;
53 
54 		if (lv == 0) {
55 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
56 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
57 				lock->lowner = td;
58 				break;
59 			}
60 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
61 			   lock->lowner == td) {
62 			nlv = (lv + 1);
63 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
64 				break;
65 		} else {
66 			if (hammer_debug_locks) {
67 				kprintf("hammer_lock_ex: held by %p\n",
68 					lock->lowner);
69 			}
70 			nlv = lv | HAMMER_LOCKF_WANTED;
71 			++hammer_contention_count;
72 			tsleep_interlock(&lock->lockval, 0);
73 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
74 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
75 				if (hammer_debug_locks)
76 					kprintf("hammer_lock_ex: try again\n");
77 			}
78 		}
79 	}
80 }
81 
82 /*
83  * Try to obtain an exclusive lock
84  */
85 int
86 hammer_lock_ex_try(struct hammer_lock *lock)
87 {
88 	thread_t td = curthread;
89 	int error;
90 	u_int lv;
91 	u_int nlv;
92 
93 	KKASSERT(lock->refs);
94 	for (;;) {
95 		lv = lock->lockval;
96 
97 		if (lv == 0) {
98 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
99 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
100 				lock->lowner = td;
101 				error = 0;
102 				break;
103 			}
104 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
105 			   lock->lowner == td) {
106 			nlv = (lv + 1);
107 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
108 				error = 0;
109 				break;
110 			}
111 		} else {
112 			error = EAGAIN;
113 			break;
114 		}
115 	}
116 	return (error);
117 }
118 
119 /*
120  * Obtain a shared lock
121  *
122  * We do not give pending exclusive locks priority over shared locks as
123  * doing so could lead to a deadlock.
124  */
125 void
126 hammer_lock_sh(struct hammer_lock *lock)
127 {
128 	thread_t td = curthread;
129 	u_int lv;
130 	u_int nlv;
131 	const char *ident = "hmrlck";
132 
133 	KKASSERT(lock->refs);
134 	for (;;) {
135 		lv = lock->lockval;
136 
137 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
138 			nlv = (lv + 1);
139 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
140 				break;
141 		} else if (lock->lowner == td) {
142 			/*
143 			 * Disallowed case, drop into kernel debugger for
144 			 * now.  A cont continues w/ an exclusive lock.
145 			 */
146 			nlv = (lv + 1);
147 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
148 				if (hammer_debug_critical)
149 					Debugger("hammer_lock_sh: holding ex");
150 				break;
151 			}
152 		} else {
153 			nlv = lv | HAMMER_LOCKF_WANTED;
154 			++hammer_contention_count;
155 			tsleep_interlock(&lock->lockval, 0);
156 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
157 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
158 		}
159 	}
160 }
161 
162 int
163 hammer_lock_sh_try(struct hammer_lock *lock)
164 {
165 	thread_t td = curthread;
166 	u_int lv;
167 	u_int nlv;
168 	int error;
169 
170 	KKASSERT(lock->refs);
171 	for (;;) {
172 		lv = lock->lockval;
173 
174 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
175 			nlv = (lv + 1);
176 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
177 				error = 0;
178 				break;
179 			}
180 		} else if (lock->lowner == td) {
181 			/*
182 			 * Disallowed case, drop into kernel debugger for
183 			 * now.  A cont continues w/ an exclusive lock.
184 			 */
185 			nlv = (lv + 1);
186 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
187 				if (hammer_debug_critical)
188 					Debugger("hammer_lock_sh: holding ex");
189 				error = 0;
190 				break;
191 			}
192 		} else {
193 			error = EAGAIN;
194 			break;
195 		}
196 	}
197 	return (error);
198 }
199 
200 /*
201  * Upgrade a shared lock to an exclusively held lock.  This function will
202  * return EDEADLK If there is more then one shared holder.
203  *
204  * No error occurs and no action is taken if the lock is already exclusively
205  * held by the caller.  If the lock is not held at all or held exclusively
206  * by someone else, this function will panic.
207  */
208 int
209 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
210 {
211 	thread_t td = curthread;
212 	u_int lv;
213 	u_int nlv;
214 	int error;
215 
216 	for (;;) {
217 		lv = lock->lockval;
218 
219 		if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
220 			nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
221 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
222 				lock->lowner = td;
223 				error = 0;
224 				break;
225 			}
226 		} else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
227 			if (lock->lowner != curthread)
228 				panic("hammer_lock_upgrade: illegal state");
229 			error = 0;
230 			break;
231 		} else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
232 			panic("hammer_lock_upgrade: lock is not held");
233 			/* NOT REACHED */
234 			error = EDEADLK;
235 			break;
236 		} else {
237 			error = EDEADLK;
238 			break;
239 		}
240 	}
241 	return (error);
242 }
243 
244 /*
245  * Downgrade an exclusively held lock to a shared lock.
246  */
247 void
248 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
249 {
250 	thread_t td __debugvar = curthread;
251 	u_int lv;
252 	u_int nlv;
253 
254 	KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
255 		 (HAMMER_LOCKF_EXCLUSIVE | shcount));
256 	KKASSERT(lock->lowner == td);
257 
258 	/*
259 	 * NOTE: Must clear owner before releasing exclusivity
260 	 */
261 	lock->lowner = NULL;
262 
263 	for (;;) {
264 		lv = lock->lockval;
265 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
266 		if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
267 			if (lv & HAMMER_LOCKF_WANTED)
268 				wakeup(&lock->lockval);
269 			break;
270 		}
271 	}
272 }
273 
274 void
275 hammer_unlock(struct hammer_lock *lock)
276 {
277 	thread_t td __debugvar = curthread;
278 	u_int lv;
279 	u_int nlv;
280 
281 	lv = lock->lockval;
282 	KKASSERT(lv != 0);
283 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
284 		KKASSERT(lock->lowner == td);
285 
286 	for (;;) {
287 		lv = lock->lockval;
288 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
289 		if (nlv > 1) {
290 			nlv = lv - 1;
291 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
292 				break;
293 		} else if (nlv == 1) {
294 			nlv = 0;
295 			if (lv & HAMMER_LOCKF_EXCLUSIVE)
296 				lock->lowner = NULL;
297 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
298 				if (lv & HAMMER_LOCKF_WANTED)
299 					wakeup(&lock->lockval);
300 				break;
301 			}
302 		} else {
303 			panic("hammer_unlock: lock %p is not held", lock);
304 		}
305 	}
306 }
307 
308 /*
309  * The calling thread must be holding a shared or exclusive lock.
310  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
311  */
312 int
313 hammer_lock_status(struct hammer_lock *lock)
314 {
315 	u_int lv = lock->lockval;
316 
317 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
318 		return(1);
319 	else if (lv)
320 		return(-1);
321 	panic("hammer_lock_status: lock must be held: %p", lock);
322 }
323 
324 /*
325  * Bump the ref count for a lock (not the excl/share count, but a separate
326  * structural reference count).  The CHECK flag will be set on a 0->1
327  * transition.
328  *
329  * This function does nothing to serialize races between multple threads.
330  * The caller can interlock it later on to deal with serialization.
331  *
332  * MPSAFE
333  */
334 void
335 hammer_ref(struct hammer_lock *lock)
336 {
337 	u_int lv;
338 	u_int nlv;
339 
340 	for (;;) {
341 		lv = lock->refs;
342 		if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
343 			nlv = (lv + 1) | HAMMER_REFS_CHECK;
344 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
345 				return;
346 		} else {
347 			nlv = (lv + 1);
348 			KKASSERT((int)nlv > 0);
349 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
350 				return;
351 		}
352 	}
353 	/* not reached */
354 }
355 
356 /*
357  * Drop the ref count for a lock (not the excl/share count, but a separate
358  * structural reference count).  The CHECK flag will be cleared on a 1->0
359  * transition.
360  *
361  * This function does nothing to serialize races between multple threads.
362  *
363  * MPSAFE
364  */
365 void
366 hammer_rel(struct hammer_lock *lock)
367 {
368 	u_int lv;
369 	u_int nlv;
370 
371 	for (;;) {
372 		lv = lock->refs;
373 		if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
374 			nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
375 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
376 				return;
377 		} else {
378 			KKASSERT((int)lv > 0);
379 			nlv = (lv - 1);
380 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
381 				return;
382 		}
383 	}
384 	/* not reached */
385 }
386 
387 /*
388  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
389  * more sophisticated versions which handle MP transition races and block
390  * when necessary.
391  *
392  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
393  * the interlock for 0->1 transitions or if the CHECK is found to be set.
394  *
395  * This case will return TRUE, the interlock will be held, and the CHECK
396  * bit also set.  Other threads attempting to ref will see the CHECK bit
397  * and block until we clean up.
398  *
399  * FALSE is returned for transitions other than 0->1 when the CHECK bit
400  * is not found to be set, or if the function loses the race with another
401  * thread.
402  *
403  * TRUE is only returned to one thread and the others will block.
404  * Effectively a TRUE indicator means 'someone transitioned 0->1
405  * and you are the first guy to successfully lock it after that, so you
406  * need to check'.  Due to races the ref-count may be greater than 1 upon
407  * return.
408  *
409  * MPSAFE
410  */
411 int
412 hammer_ref_interlock(struct hammer_lock *lock)
413 {
414 	u_int lv;
415 	u_int nlv;
416 
417 	/*
418 	 * Integrated reference count bump, lock, and check, with hot-path.
419 	 *
420 	 * (a) Return 1	(+LOCKED, +CHECK)	0->1 transition
421 	 * (b) Return 0 (-LOCKED, -CHECK)	N->N+1 transition
422 	 * (c) Break out (+CHECK)		Check condition and Cannot lock
423 	 * (d) Return 1 (+LOCKED, +CHECK)	Successfully locked
424 	 */
425 	for (;;) {
426 		lv = lock->refs;
427 		if (lv == 0) {
428 			nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
429 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
430 				lock->rowner = curthread;
431 				return(1);
432 			}
433 		} else {
434 			nlv = (lv + 1);
435 			if ((lv & ~HAMMER_REFS_FLAGS) == 0)
436 				nlv |= HAMMER_REFS_CHECK;
437 			if ((nlv & HAMMER_REFS_CHECK) == 0) {
438 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
439 					return(0);
440 			} else if (lv & HAMMER_REFS_LOCKED) {
441 				/* CHECK also set here */
442 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
443 					break;
444 			} else {
445 				/* CHECK also set here */
446 				nlv |= HAMMER_REFS_LOCKED;
447 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
448 					lock->rowner = curthread;
449 					return(1);
450 				}
451 			}
452 		}
453 	}
454 
455 	/*
456 	 * Defered check condition because we were unable to acquire the
457 	 * lock.  We must block until the check condition is cleared due
458 	 * to a race with another thread, or we are able to acquire the
459 	 * lock.
460 	 *
461 	 * (a) Return 0	(-CHECK)		Another thread handled it
462 	 * (b) Return 1 (+LOCKED, +CHECK)	We handled it.
463 	 */
464 	for (;;) {
465 		lv = lock->refs;
466 		if ((lv & HAMMER_REFS_CHECK) == 0)
467 			return(0);
468 		if (lv & HAMMER_REFS_LOCKED) {
469 			tsleep_interlock(&lock->refs, 0);
470 			nlv = (lv | HAMMER_REFS_WANTED);
471 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
472 				tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
473 		} else {
474 			/* CHECK also set here */
475 			nlv = lv | HAMMER_REFS_LOCKED;
476 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
477 				lock->rowner = curthread;
478 				return(1);
479 			}
480 		}
481 	}
482 	/* not reached */
483 }
484 
485 /*
486  * This is the same as hammer_ref_interlock() but asserts that the
487  * 0->1 transition is always true, thus the lock must have no references
488  * on entry or have CHECK set, and will have one reference with the
489  * interlock held on return.  It must also not be interlocked on entry
490  * by anyone.
491  *
492  * NOTE that CHECK will never be found set when the ref-count is 0.
493  *
494  * TRUE is always returned to match the API for hammer_ref_interlock().
495  * This function returns with one ref, the lock held, and the CHECK bit set.
496  */
497 int
498 hammer_ref_interlock_true(struct hammer_lock *lock)
499 {
500 	u_int lv;
501 	u_int nlv;
502 
503 	for (;;) {
504 		lv = lock->refs;
505 
506 		if (lv) {
507 			panic("hammer_ref_interlock_true: bad lock %p %08x\n",
508 			      lock, lock->refs);
509 		}
510 		nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
511 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
512 			lock->rowner = curthread;
513 			return (1);
514 		}
515 	}
516 }
517 
518 /*
519  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
520  * CHECK flag.  The ref-count remains unchanged.
521  *
522  * This routine is called in the load path when the load succeeds.
523  */
524 void
525 hammer_ref_interlock_done(struct hammer_lock *lock)
526 {
527 	u_int lv;
528 	u_int nlv;
529 
530 	for (;;) {
531 		lv = lock->refs;
532 		nlv = lv & ~HAMMER_REFS_FLAGS;
533 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
534 			if (lv & HAMMER_REFS_WANTED)
535 				wakeup(&lock->refs);
536 			break;
537 		}
538 	}
539 }
540 
541 /*
542  * hammer_rel_interlock() works a bit differently in that it must
543  * acquire the lock in tandem with a 1->0 transition.  CHECK is
544  * not used.
545  *
546  * TRUE is returned on 1->0 transitions with the lock held on return
547  * and FALSE is returned otherwise with the lock not held.
548  *
549  * It is important to note that the refs are not stable and may
550  * increase while we hold the lock, the TRUE indication only means
551  * that we transitioned 1->0, not necessarily that we stayed at 0.
552  *
553  * Another thread bumping refs while we hold the lock will set CHECK,
554  * causing one of the competing hammer_ref_interlock() calls to
555  * return TRUE after we release our lock.
556  *
557  * MPSAFE
558  */
559 int
560 hammer_rel_interlock(struct hammer_lock *lock, int locked)
561 {
562 	u_int lv;
563 	u_int nlv;
564 
565 	/*
566 	 * In locked mode (failure/unload path) we release the
567 	 * ref-count but leave it locked.
568 	 */
569 	if (locked) {
570 		hammer_rel(lock);
571 		return(1);
572 	}
573 
574 	/*
575 	 * Integrated reference count drop with LOCKED, plus the hot-path
576 	 * returns.
577 	 */
578 	for (;;) {
579 		lv = lock->refs;
580 
581 		if (lv == 1) {
582 			nlv = 0 | HAMMER_REFS_LOCKED;
583 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
584 				lock->rowner = curthread;
585 				return(1);
586 			}
587 		} else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
588 			if ((lv & HAMMER_REFS_LOCKED) == 0) {
589 				nlv = (lv - 1) | HAMMER_REFS_LOCKED;
590 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
591 					lock->rowner = curthread;
592 					return(1);
593 				}
594 			} else {
595 				nlv = lv | HAMMER_REFS_WANTED;
596 				tsleep_interlock(&lock->refs, 0);
597 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
598 					tsleep(&lock->refs, PINTERLOCKED,
599 					       "h0lk", 0);
600 				}
601 			}
602 		} else {
603 			nlv = (lv - 1);
604 			KKASSERT((int)nlv >= 0);
605 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
606 				return(0);
607 		}
608 	}
609 	/* not reached */
610 }
611 
612 /*
613  * Unlock the interlock acquired by hammer_rel_interlock().
614  *
615  * If orig_locked is non-zero the interlock was originally held prior to
616  * the hammer_rel_interlock() call and passed through to us.  In this
617  * case we want to retain the CHECK error state if not transitioning
618  * to 0.
619  *
620  * The code is the same either way so we do not have to conditionalize
621  * on orig_locked.
622  */
623 void
624 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
625 {
626 	u_int lv;
627 	u_int nlv;
628 
629 	for (;;) {
630 		lv = lock->refs;
631 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
632 		if ((lv & ~HAMMER_REFS_FLAGS) == 0)
633 			nlv &= ~HAMMER_REFS_CHECK;
634 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
635 			if (lv & HAMMER_REFS_WANTED)
636 				wakeup(&lock->refs);
637 			break;
638 		}
639 	}
640 }
641 
642 /*
643  * Acquire the interlock on lock->refs.
644  *
645  * Return TRUE if CHECK is currently set.  Note that CHECK will not
646  * be set if the reference count is 0, but can get set if this function
647  * is preceeded by, say, hammer_ref(), or through races with other
648  * threads.  The return value allows the caller to use the same logic
649  * as hammer_ref_interlock().
650  *
651  * MPSAFE
652  */
653 int
654 hammer_get_interlock(struct hammer_lock *lock)
655 {
656 	u_int lv;
657 	u_int nlv;
658 
659 	for (;;) {
660 		lv = lock->refs;
661 		if (lv & HAMMER_REFS_LOCKED) {
662 			nlv = lv | HAMMER_REFS_WANTED;
663 			tsleep_interlock(&lock->refs, 0);
664 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
665 				tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
666 		} else {
667 			nlv = (lv | HAMMER_REFS_LOCKED);
668 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
669 				lock->rowner = curthread;
670 				return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
671 			}
672 		}
673 	}
674 }
675 
676 /*
677  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
678  * cache callback code to disassociate or lock the bufs related to HAMMER
679  * structures.
680  *
681  * During teardown the related bp will be acquired by hammer_io_release()
682  * which interocks our test.
683  *
684  * Returns non-zero on success, zero on failure.
685  */
686 int
687 hammer_try_interlock_norefs(struct hammer_lock *lock)
688 {
689 	u_int lv;
690 	u_int nlv;
691 
692 	for (;;) {
693 		lv = lock->refs;
694 		if (lv == 0) {
695 			nlv = lv | HAMMER_REFS_LOCKED;
696 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
697 				lock->rowner = curthread;
698 				return(1);
699 			}
700 		} else {
701 			return(0);
702 		}
703 	}
704 	/* not reached */
705 }
706 
707 /*
708  * Release the interlock on lock->refs.  This function will set
709  * CHECK if the refs is non-zero and error is non-zero, and clear
710  * CHECK otherwise.
711  *
712  * MPSAFE
713  */
714 void
715 hammer_put_interlock(struct hammer_lock *lock, int error)
716 {
717 	u_int lv;
718 	u_int nlv;
719 
720 	for (;;) {
721 		lv = lock->refs;
722 		KKASSERT(lv & HAMMER_REFS_LOCKED);
723 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
724 
725 		if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
726 			nlv &= ~HAMMER_REFS_CHECK;
727 		else
728 			nlv |= HAMMER_REFS_CHECK;
729 
730 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
731 			if (lv & HAMMER_REFS_WANTED)
732 				wakeup(&lock->refs);
733 			return;
734 		}
735 	}
736 }
737 
738 /*
739  * The sync_lock must be held when doing any modifying operations on
740  * meta-data.  It does not have to be held when modifying non-meta-data buffers
741  * (backend or frontend).
742  *
743  * The flusher holds the lock exclusively while all other consumers hold it
744  * shared.  All modifying operations made while holding the lock are atomic
745  * in that they will be made part of the same flush group.
746  *
747  * Due to the atomicy requirement deadlock recovery code CANNOT release the
748  * sync lock, nor can we give pending exclusive sync locks priority over
749  * a shared sync lock as this could lead to a 3-way deadlock.
750  */
751 void
752 hammer_sync_lock_ex(hammer_transaction_t trans)
753 {
754 	++trans->sync_lock_refs;
755 	hammer_lock_ex(&trans->hmp->sync_lock);
756 }
757 
758 void
759 hammer_sync_lock_sh(hammer_transaction_t trans)
760 {
761 	++trans->sync_lock_refs;
762 	hammer_lock_sh(&trans->hmp->sync_lock);
763 }
764 
765 int
766 hammer_sync_lock_sh_try(hammer_transaction_t trans)
767 {
768 	int error;
769 
770 	++trans->sync_lock_refs;
771 	if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
772 		--trans->sync_lock_refs;
773 	return (error);
774 }
775 
776 void
777 hammer_sync_unlock(hammer_transaction_t trans)
778 {
779 	--trans->sync_lock_refs;
780 	hammer_unlock(&trans->hmp->sync_lock);
781 }
782 
783 /*
784  * Misc
785  */
786 u_int32_t
787 hammer_to_unix_xid(uuid_t *uuid)
788 {
789 	return(*(u_int32_t *)&uuid->node[2]);
790 }
791 
792 void
793 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
794 {
795 	bzero(uuid, sizeof(*uuid));
796 	*(u_int32_t *)&uuid->node[2] = guid;
797 }
798 
799 void
800 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
801 {
802 	ts->tv_sec = (unsigned long)(xtime / 1000000);
803 	ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
804 }
805 
806 u_int64_t
807 hammer_timespec_to_time(struct timespec *ts)
808 {
809 	u_int64_t xtime;
810 
811 	xtime = (unsigned)(ts->tv_nsec / 1000) +
812 		(unsigned long)ts->tv_sec * 1000000ULL;
813 	return(xtime);
814 }
815 
816 
817 /*
818  * Convert a HAMMER filesystem object type to a vnode type
819  */
820 enum vtype
821 hammer_get_vnode_type(u_int8_t obj_type)
822 {
823 	switch(obj_type) {
824 	case HAMMER_OBJTYPE_DIRECTORY:
825 		return(VDIR);
826 	case HAMMER_OBJTYPE_REGFILE:
827 		return(VREG);
828 	case HAMMER_OBJTYPE_DBFILE:
829 		return(VDATABASE);
830 	case HAMMER_OBJTYPE_FIFO:
831 		return(VFIFO);
832 	case HAMMER_OBJTYPE_SOCKET:
833 		return(VSOCK);
834 	case HAMMER_OBJTYPE_CDEV:
835 		return(VCHR);
836 	case HAMMER_OBJTYPE_BDEV:
837 		return(VBLK);
838 	case HAMMER_OBJTYPE_SOFTLINK:
839 		return(VLNK);
840 	default:
841 		return(VBAD);
842 	}
843 	/* not reached */
844 }
845 
846 int
847 hammer_get_dtype(u_int8_t obj_type)
848 {
849 	switch(obj_type) {
850 	case HAMMER_OBJTYPE_DIRECTORY:
851 		return(DT_DIR);
852 	case HAMMER_OBJTYPE_REGFILE:
853 		return(DT_REG);
854 	case HAMMER_OBJTYPE_DBFILE:
855 		return(DT_DBF);
856 	case HAMMER_OBJTYPE_FIFO:
857 		return(DT_FIFO);
858 	case HAMMER_OBJTYPE_SOCKET:
859 		return(DT_SOCK);
860 	case HAMMER_OBJTYPE_CDEV:
861 		return(DT_CHR);
862 	case HAMMER_OBJTYPE_BDEV:
863 		return(DT_BLK);
864 	case HAMMER_OBJTYPE_SOFTLINK:
865 		return(DT_LNK);
866 	default:
867 		return(DT_UNKNOWN);
868 	}
869 	/* not reached */
870 }
871 
872 u_int8_t
873 hammer_get_obj_type(enum vtype vtype)
874 {
875 	switch(vtype) {
876 	case VDIR:
877 		return(HAMMER_OBJTYPE_DIRECTORY);
878 	case VREG:
879 		return(HAMMER_OBJTYPE_REGFILE);
880 	case VDATABASE:
881 		return(HAMMER_OBJTYPE_DBFILE);
882 	case VFIFO:
883 		return(HAMMER_OBJTYPE_FIFO);
884 	case VSOCK:
885 		return(HAMMER_OBJTYPE_SOCKET);
886 	case VCHR:
887 		return(HAMMER_OBJTYPE_CDEV);
888 	case VBLK:
889 		return(HAMMER_OBJTYPE_BDEV);
890 	case VLNK:
891 		return(HAMMER_OBJTYPE_SOFTLINK);
892 	default:
893 		return(HAMMER_OBJTYPE_UNKNOWN);
894 	}
895 	/* not reached */
896 }
897 
898 /*
899  * Return flags for hammer_delete_at_cursor()
900  */
901 int
902 hammer_nohistory(hammer_inode_t ip)
903 {
904 	if (ip->hmp->hflags & HMNT_NOHISTORY)
905 		return(HAMMER_DELETE_DESTROY);
906 	if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
907 		return(HAMMER_DELETE_DESTROY);
908 	return(0);
909 }
910 
911 /*
912  * ALGORITHM VERSION 0:
913  *	Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
914  *	crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
915  *	to generate a unique key and will scan all entries with the same upper
916  *	32 bits when issuing a lookup.
917  *
918  *	0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
919  *
920  * ALGORITHM VERSION 1:
921  *
922  *	This algorithm breaks the filename down into a separate 32-bit crcs
923  *	for each filename segment separated by a special character (dot,
924  *	underscore, underline, or tilde).  The CRCs are then added together.
925  *	This allows temporary names.  A full-filename 16 bit crc is also
926  *	generated to deal with degenerate conditions.
927  *
928  *	The algorithm is designed to handle create/rename situations such
929  *	that a create with an extention to a rename without an extention
930  *	only shifts the key space rather than randomizes it.
931  *
932  *	NOTE: The inode allocator cache can only match 10 bits so we do
933  *	      not really have any room for a partial sorted name, and
934  *	      numbers don't sort well in that situation anyway.
935  *
936  *	0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
937  *
938  *
939  * We strip bit 63 in order to provide a positive key, this way a seek
940  * offset of 0 will represent the base of the directory.
941  *
942  * We usually strip bit 0 (set it to 0) in order to provide a consistent
943  * iteration space for collisions.
944  *
945  * This function can never return 0.  We use the MSB-0 space to synthesize
946  * artificial directory entries such as "." and "..".
947  */
948 int64_t
949 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
950 			 u_int32_t *max_iterationsp)
951 {
952 	const char *aname = name;
953 	int32_t crcx;
954 	int64_t key;
955 	int i;
956 	int j;
957 
958 	switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
959 	case HAMMER_INODE_CAP_DIRHASH_ALG0:
960 		/*
961 		 * Original algorithm
962 		 */
963 		key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
964 		if (key == 0)
965 			key |= 0x100000000LL;
966 		*max_iterationsp = 0xFFFFFFFFU;
967 		break;
968 	case HAMMER_INODE_CAP_DIRHASH_ALG1:
969 		/*
970 		 * Filesystem version 6 or better will create directories
971 		 * using the ALG1 dirhash.  This hash breaks the filename
972 		 * up into domains separated by special characters and
973 		 * hashes each domain independently.
974 		 *
975 		 * We also do a simple sub-sort using the first character
976 		 * of the filename in the top 5-bits.
977 		 */
978 		key = 0;
979 
980 		/*
981 		 * m32
982 		 */
983 		crcx = 0;
984 		for (i = j = 0; i < len; ++i) {
985 			if (aname[i] == '.' ||
986 			    aname[i] == '-' ||
987 			    aname[i] == '_' ||
988 			    aname[i] == '~') {
989 				if (i != j)
990 					crcx += crc32(aname + j, i - j);
991 				j = i + 1;
992 			}
993 		}
994 		if (i != j)
995 			crcx += crc32(aname + j, i - j);
996 
997 #if 0
998 		/*
999 		 * xor top 5 bits 0mmmm into low bits and steal the top 5
1000 		 * bits as a semi sub sort using the first character of
1001 		 * the filename.  bit 63 is always left as 0 so directory
1002 		 * keys are positive numbers.
1003 		 */
1004 		crcx ^= (uint32_t)crcx >> (32 - 5);
1005 		crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1006 #endif
1007 		crcx &= 0x7FFFFFFFU;
1008 
1009 		key |= (uint64_t)crcx << 32;
1010 
1011 		/*
1012 		 * l16 - crc of entire filename
1013 		 *
1014 		 * This crc reduces degenerate hash collision conditions
1015 		 */
1016 		crcx = crc32(aname, len);
1017 		crcx = crcx ^ (crcx << 16);
1018 		key |= crcx & 0xFFFF0000U;
1019 
1020 		/*
1021 		 * Cleanup
1022 		 */
1023 		if ((key & 0xFFFFFFFF00000000LL) == 0)
1024 			key |= 0x100000000LL;
1025 		if (hammer_debug_general & 0x0400) {
1026 			kprintf("namekey2: 0x%016llx %*.*s\n",
1027 				(long long)key, len, len, aname);
1028 		}
1029 		*max_iterationsp = 0x00FFFFFF;
1030 		break;
1031 	case HAMMER_INODE_CAP_DIRHASH_ALG2:
1032 	case HAMMER_INODE_CAP_DIRHASH_ALG3:
1033 	default:
1034 		key = 0;			/* compiler warning */
1035 		*max_iterationsp = 1;		/* sanity */
1036 		panic("hammer_directory_namekey: bad algorithm %p\n", dip);
1037 		break;
1038 	}
1039 	return(key);
1040 }
1041 
1042 /*
1043  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1044  * EINVAL on failure.
1045  *
1046  * If this function fails *ispfs, *tidp, and *localizationp will not
1047  * be modified.
1048  */
1049 int
1050 hammer_str_to_tid(const char *str, int *ispfsp,
1051 		  hammer_tid_t *tidp, u_int32_t *localizationp)
1052 {
1053 	hammer_tid_t tid;
1054 	u_int32_t localization;
1055 	char *ptr;
1056 	int ispfs;
1057 	int n;
1058 
1059 	/*
1060 	 * Forms allowed for TID:  "0x%016llx"
1061 	 *			   "-1"
1062 	 */
1063 	tid = strtouq(str, &ptr, 0);
1064 	n = ptr - str;
1065 	if (n == 2 && str[0] == '-' && str[1] == '1') {
1066 		/* ok */
1067 	} else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1068 		/* ok */
1069 	} else {
1070 		return(EINVAL);
1071 	}
1072 
1073 	/*
1074 	 * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1075 	 */
1076 	str = ptr;
1077 	if (*str == ':') {
1078 		localization = strtoul(str + 1, &ptr, 10) << 16;
1079 		if (ptr - str != 6)
1080 			return(EINVAL);
1081 		str = ptr;
1082 		ispfs = 1;
1083 	} else {
1084 		localization = *localizationp;
1085 		ispfs = 0;
1086 	}
1087 
1088 	/*
1089 	 * Any trailing junk invalidates special extension handling.
1090 	 */
1091 	if (*str)
1092 		return(EINVAL);
1093 	*tidp = tid;
1094 	*localizationp = localization;
1095 	*ispfsp = ispfs;
1096 	return(0);
1097 }
1098 
1099 void
1100 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1101 {
1102 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1103 }
1104 
1105 void
1106 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1107 {
1108 	ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1109 			  crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1110 }
1111 
1112 int
1113 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1114 {
1115 	hammer_crc_t crc;
1116 
1117 	crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1118 	return (blockmap->entry_crc == crc);
1119 }
1120 
1121 int
1122 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1123 {
1124 	hammer_crc_t crc;
1125 
1126 	crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1127 	      crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1128 	return (ondisk->vol_crc == crc);
1129 }
1130 
1131 int
1132 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1133 {
1134 	hammer_crc_t crc;
1135 
1136 	crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1137 	return (ondisk->crc == crc);
1138 }
1139 
1140 /*
1141  * Test or set the leaf->data_crc field.  Deal with any special cases given
1142  * a generic B-Tree leaf element and its data.
1143  *
1144  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1145  *       to be updated in-place.
1146  */
1147 int
1148 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1149 {
1150 	hammer_crc_t crc;
1151 
1152 	if (leaf->data_len == 0) {
1153 		crc = 0;
1154 	} else {
1155 		switch(leaf->base.rec_type) {
1156 		case HAMMER_RECTYPE_INODE:
1157 			if (leaf->data_len != sizeof(struct hammer_inode_data))
1158 				return(0);
1159 			crc = crc32(data, HAMMER_INODE_CRCSIZE);
1160 			break;
1161 		default:
1162 			crc = crc32(data, leaf->data_len);
1163 			break;
1164 		}
1165 	}
1166 	return (leaf->data_crc == crc);
1167 }
1168 
1169 void
1170 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1171 {
1172 	if (leaf->data_len == 0) {
1173 		leaf->data_crc = 0;
1174 	} else {
1175 		switch(leaf->base.rec_type) {
1176 		case HAMMER_RECTYPE_INODE:
1177 			KKASSERT(leaf->data_len ==
1178 				  sizeof(struct hammer_inode_data));
1179 			leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1180 			break;
1181 		default:
1182 			leaf->data_crc = crc32(data, leaf->data_len);
1183 			break;
1184 		}
1185 	}
1186 }
1187 
1188 void
1189 hkprintf(const char *ctl, ...)
1190 {
1191 	__va_list va;
1192 
1193 	if (hammer_debug_debug) {
1194 		__va_start(va, ctl);
1195 		kvprintf(ctl, va);
1196 		__va_end(va);
1197 	}
1198 }
1199 
1200 /*
1201  * Return the block size at the specified file offset.
1202  */
1203 int
1204 hammer_blocksize(int64_t file_offset)
1205 {
1206 	if (file_offset < HAMMER_XDEMARC)
1207 		return(HAMMER_BUFSIZE);
1208 	else
1209 		return(HAMMER_XBUFSIZE);
1210 }
1211 
1212 int
1213 hammer_blockoff(int64_t file_offset)
1214 {
1215 	if (file_offset < HAMMER_XDEMARC)
1216 		return((int)file_offset & HAMMER_BUFMASK);
1217 	else
1218 		return((int)file_offset & HAMMER_XBUFMASK);
1219 }
1220 
1221 /*
1222  * Return the demarkation point between the two offsets where
1223  * the block size changes.
1224  */
1225 int64_t
1226 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1227 {
1228 	if (file_offset1 < HAMMER_XDEMARC) {
1229 		if (file_offset2 <= HAMMER_XDEMARC)
1230 			return(file_offset2);
1231 		return(HAMMER_XDEMARC);
1232 	}
1233 	panic("hammer_blockdemarc: illegal range %lld %lld\n",
1234 	      (long long)file_offset1, (long long)file_offset2);
1235 }
1236 
1237 udev_t
1238 hammer_fsid_to_udev(uuid_t *uuid)
1239 {
1240 	u_int32_t crc;
1241 
1242 	crc = crc32(uuid, sizeof(*uuid));
1243 	return((udev_t)crc);
1244 }
1245 
1246