xref: /dragonfly/sys/vfs/hammer/hammer_subs.c (revision d4ef6694)
1 /*
2  * Copyright (c) 2007-2011 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER structural locking
36  */
37 
38 #include "hammer.h"
39 #include <sys/dirent.h>
40 
41 void
42 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
43 {
44 	thread_t td = curthread;
45 	u_int lv;
46 	u_int nlv;
47 
48 	KKASSERT(lock->refs);
49 	for (;;) {
50 		lv = lock->lockval;
51 
52 		if (lv == 0) {
53 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
54 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
55 				lock->lowner = td;
56 				break;
57 			}
58 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
59 			   lock->lowner == td) {
60 			nlv = (lv + 1);
61 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
62 				break;
63 		} else {
64 			if (hammer_debug_locks) {
65 				kprintf("hammer_lock_ex: held by %p\n",
66 					lock->lowner);
67 			}
68 			nlv = lv | HAMMER_LOCKF_WANTED;
69 			++hammer_contention_count;
70 			tsleep_interlock(&lock->lockval, 0);
71 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
72 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
73 				if (hammer_debug_locks)
74 					kprintf("hammer_lock_ex: try again\n");
75 			}
76 		}
77 	}
78 }
79 
80 /*
81  * Try to obtain an exclusive lock
82  */
83 int
84 hammer_lock_ex_try(struct hammer_lock *lock)
85 {
86 	thread_t td = curthread;
87 	int error;
88 	u_int lv;
89 	u_int nlv;
90 
91 	KKASSERT(lock->refs);
92 	for (;;) {
93 		lv = lock->lockval;
94 
95 		if (lv == 0) {
96 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
97 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
98 				lock->lowner = td;
99 				error = 0;
100 				break;
101 			}
102 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
103 			   lock->lowner == td) {
104 			nlv = (lv + 1);
105 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
106 				error = 0;
107 				break;
108 			}
109 		} else {
110 			error = EAGAIN;
111 			break;
112 		}
113 	}
114 	return (error);
115 }
116 
117 /*
118  * Obtain a shared lock
119  *
120  * We do not give pending exclusive locks priority over shared locks as
121  * doing so could lead to a deadlock.
122  */
123 void
124 hammer_lock_sh(struct hammer_lock *lock)
125 {
126 	thread_t td = curthread;
127 	u_int lv;
128 	u_int nlv;
129 	const char *ident = "hmrlck";
130 
131 	KKASSERT(lock->refs);
132 	for (;;) {
133 		lv = lock->lockval;
134 
135 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
136 			nlv = (lv + 1);
137 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
138 				break;
139 		} else if (lock->lowner == td) {
140 			/*
141 			 * Disallowed case, drop into kernel debugger for
142 			 * now.  A cont continues w/ an exclusive lock.
143 			 */
144 			nlv = (lv + 1);
145 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
146 				if (hammer_debug_critical)
147 					Debugger("hammer_lock_sh: holding ex");
148 				break;
149 			}
150 		} else {
151 			nlv = lv | HAMMER_LOCKF_WANTED;
152 			++hammer_contention_count;
153 			tsleep_interlock(&lock->lockval, 0);
154 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
155 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
156 		}
157 	}
158 }
159 
160 int
161 hammer_lock_sh_try(struct hammer_lock *lock)
162 {
163 	thread_t td = curthread;
164 	u_int lv;
165 	u_int nlv;
166 	int error;
167 
168 	KKASSERT(lock->refs);
169 	for (;;) {
170 		lv = lock->lockval;
171 
172 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
173 			nlv = (lv + 1);
174 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
175 				error = 0;
176 				break;
177 			}
178 		} else if (lock->lowner == td) {
179 			/*
180 			 * Disallowed case, drop into kernel debugger for
181 			 * now.  A cont continues w/ an exclusive lock.
182 			 */
183 			nlv = (lv + 1);
184 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
185 				if (hammer_debug_critical)
186 					Debugger("hammer_lock_sh: holding ex");
187 				error = 0;
188 				break;
189 			}
190 		} else {
191 			error = EAGAIN;
192 			break;
193 		}
194 	}
195 	return (error);
196 }
197 
198 /*
199  * Upgrade a shared lock to an exclusively held lock.  This function will
200  * return EDEADLK If there is more then one shared holder.
201  *
202  * No error occurs and no action is taken if the lock is already exclusively
203  * held by the caller.  If the lock is not held at all or held exclusively
204  * by someone else, this function will panic.
205  */
206 int
207 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
208 {
209 	thread_t td = curthread;
210 	u_int lv;
211 	u_int nlv;
212 	int error;
213 
214 	for (;;) {
215 		lv = lock->lockval;
216 
217 		if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
218 			nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
219 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
220 				lock->lowner = td;
221 				error = 0;
222 				break;
223 			}
224 		} else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
225 			if (lock->lowner != curthread)
226 				panic("hammer_lock_upgrade: illegal state");
227 			error = 0;
228 			break;
229 		} else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
230 			panic("hammer_lock_upgrade: lock is not held");
231 			/* NOT REACHED */
232 			error = EDEADLK;
233 			break;
234 		} else {
235 			error = EDEADLK;
236 			break;
237 		}
238 	}
239 	return (error);
240 }
241 
242 /*
243  * Downgrade an exclusively held lock to a shared lock.
244  */
245 void
246 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
247 {
248 	thread_t td __debugvar = curthread;
249 	u_int lv;
250 	u_int nlv;
251 
252 	KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
253 		 (HAMMER_LOCKF_EXCLUSIVE | shcount));
254 	KKASSERT(lock->lowner == td);
255 
256 	/*
257 	 * NOTE: Must clear owner before releasing exclusivity
258 	 */
259 	lock->lowner = NULL;
260 
261 	for (;;) {
262 		lv = lock->lockval;
263 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
264 		if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
265 			if (lv & HAMMER_LOCKF_WANTED)
266 				wakeup(&lock->lockval);
267 			break;
268 		}
269 	}
270 }
271 
272 void
273 hammer_unlock(struct hammer_lock *lock)
274 {
275 	thread_t td __debugvar = curthread;
276 	u_int lv;
277 	u_int nlv;
278 
279 	lv = lock->lockval;
280 	KKASSERT(lv != 0);
281 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
282 		KKASSERT(lock->lowner == td);
283 
284 	for (;;) {
285 		lv = lock->lockval;
286 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
287 		if (nlv > 1) {
288 			nlv = lv - 1;
289 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
290 				break;
291 		} else if (nlv == 1) {
292 			nlv = 0;
293 			if (lv & HAMMER_LOCKF_EXCLUSIVE)
294 				lock->lowner = NULL;
295 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
296 				if (lv & HAMMER_LOCKF_WANTED)
297 					wakeup(&lock->lockval);
298 				break;
299 			}
300 		} else {
301 			panic("hammer_unlock: lock %p is not held", lock);
302 		}
303 	}
304 }
305 
306 /*
307  * The calling thread must be holding a shared or exclusive lock.
308  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
309  */
310 int
311 hammer_lock_status(struct hammer_lock *lock)
312 {
313 	u_int lv = lock->lockval;
314 
315 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
316 		return(1);
317 	else if (lv)
318 		return(-1);
319 	panic("hammer_lock_status: lock must be held: %p", lock);
320 }
321 
322 /*
323  * Bump the ref count for a lock (not the excl/share count, but a separate
324  * structural reference count).  The CHECK flag will be set on a 0->1
325  * transition.
326  *
327  * This function does nothing to serialize races between multple threads.
328  * The caller can interlock it later on to deal with serialization.
329  *
330  * MPSAFE
331  */
332 void
333 hammer_ref(struct hammer_lock *lock)
334 {
335 	u_int lv;
336 	u_int nlv;
337 
338 	for (;;) {
339 		lv = lock->refs;
340 		if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
341 			nlv = (lv + 1) | HAMMER_REFS_CHECK;
342 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
343 				return;
344 		} else {
345 			nlv = (lv + 1);
346 			KKASSERT((int)nlv > 0);
347 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
348 				return;
349 		}
350 	}
351 	/* not reached */
352 }
353 
354 /*
355  * Drop the ref count for a lock (not the excl/share count, but a separate
356  * structural reference count).  The CHECK flag will be cleared on a 1->0
357  * transition.
358  *
359  * This function does nothing to serialize races between multple threads.
360  *
361  * MPSAFE
362  */
363 void
364 hammer_rel(struct hammer_lock *lock)
365 {
366 	u_int lv;
367 	u_int nlv;
368 
369 	for (;;) {
370 		lv = lock->refs;
371 		if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
372 			nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
373 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
374 				return;
375 		} else {
376 			KKASSERT((int)lv > 0);
377 			nlv = (lv - 1);
378 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
379 				return;
380 		}
381 	}
382 	/* not reached */
383 }
384 
385 /*
386  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
387  * more sophisticated versions which handle MP transition races and block
388  * when necessary.
389  *
390  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
391  * the interlock for 0->1 transitions or if the CHECK is found to be set.
392  *
393  * This case will return TRUE, the interlock will be held, and the CHECK
394  * bit also set.  Other threads attempting to ref will see the CHECK bit
395  * and block until we clean up.
396  *
397  * FALSE is returned for transitions other than 0->1 when the CHECK bit
398  * is not found to be set, or if the function loses the race with another
399  * thread.
400  *
401  * TRUE is only returned to one thread and the others will block.
402  * Effectively a TRUE indicator means 'someone transitioned 0->1
403  * and you are the first guy to successfully lock it after that, so you
404  * need to check'.  Due to races the ref-count may be greater than 1 upon
405  * return.
406  *
407  * MPSAFE
408  */
409 int
410 hammer_ref_interlock(struct hammer_lock *lock)
411 {
412 	u_int lv;
413 	u_int nlv;
414 
415 	/*
416 	 * Integrated reference count bump, lock, and check, with hot-path.
417 	 *
418 	 * (a) Return 1	(+LOCKED, +CHECK)	0->1 transition
419 	 * (b) Return 0 (-LOCKED, -CHECK)	N->N+1 transition
420 	 * (c) Break out (+CHECK)		Check condition and Cannot lock
421 	 * (d) Return 1 (+LOCKED, +CHECK)	Successfully locked
422 	 */
423 	for (;;) {
424 		lv = lock->refs;
425 		if (lv == 0) {
426 			nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
427 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
428 				lock->rowner = curthread;
429 				return(1);
430 			}
431 		} else {
432 			nlv = (lv + 1);
433 			if ((lv & ~HAMMER_REFS_FLAGS) == 0)
434 				nlv |= HAMMER_REFS_CHECK;
435 			if ((nlv & HAMMER_REFS_CHECK) == 0) {
436 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
437 					return(0);
438 			} else if (lv & HAMMER_REFS_LOCKED) {
439 				/* CHECK also set here */
440 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
441 					break;
442 			} else {
443 				/* CHECK also set here */
444 				nlv |= HAMMER_REFS_LOCKED;
445 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
446 					lock->rowner = curthread;
447 					return(1);
448 				}
449 			}
450 		}
451 	}
452 
453 	/*
454 	 * Defered check condition because we were unable to acquire the
455 	 * lock.  We must block until the check condition is cleared due
456 	 * to a race with another thread, or we are able to acquire the
457 	 * lock.
458 	 *
459 	 * (a) Return 0	(-CHECK)		Another thread handled it
460 	 * (b) Return 1 (+LOCKED, +CHECK)	We handled it.
461 	 */
462 	for (;;) {
463 		lv = lock->refs;
464 		if ((lv & HAMMER_REFS_CHECK) == 0)
465 			return(0);
466 		if (lv & HAMMER_REFS_LOCKED) {
467 			tsleep_interlock(&lock->refs, 0);
468 			nlv = (lv | HAMMER_REFS_WANTED);
469 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
470 				tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
471 		} else {
472 			/* CHECK also set here */
473 			nlv = lv | HAMMER_REFS_LOCKED;
474 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
475 				lock->rowner = curthread;
476 				return(1);
477 			}
478 		}
479 	}
480 	/* not reached */
481 }
482 
483 /*
484  * This is the same as hammer_ref_interlock() but asserts that the
485  * 0->1 transition is always true, thus the lock must have no references
486  * on entry or have CHECK set, and will have one reference with the
487  * interlock held on return.  It must also not be interlocked on entry
488  * by anyone.
489  *
490  * NOTE that CHECK will never be found set when the ref-count is 0.
491  *
492  * TRUE is always returned to match the API for hammer_ref_interlock().
493  * This function returns with one ref, the lock held, and the CHECK bit set.
494  */
495 int
496 hammer_ref_interlock_true(struct hammer_lock *lock)
497 {
498 	u_int lv;
499 	u_int nlv;
500 
501 	for (;;) {
502 		lv = lock->refs;
503 
504 		if (lv) {
505 			panic("hammer_ref_interlock_true: bad lock %p %08x",
506 			      lock, lock->refs);
507 		}
508 		nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
509 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
510 			lock->rowner = curthread;
511 			return (1);
512 		}
513 	}
514 }
515 
516 /*
517  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
518  * CHECK flag.  The ref-count remains unchanged.
519  *
520  * This routine is called in the load path when the load succeeds.
521  */
522 void
523 hammer_ref_interlock_done(struct hammer_lock *lock)
524 {
525 	u_int lv;
526 	u_int nlv;
527 
528 	for (;;) {
529 		lv = lock->refs;
530 		nlv = lv & ~HAMMER_REFS_FLAGS;
531 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
532 			if (lv & HAMMER_REFS_WANTED)
533 				wakeup(&lock->refs);
534 			break;
535 		}
536 	}
537 }
538 
539 /*
540  * hammer_rel_interlock() works a bit differently in that it must
541  * acquire the lock in tandem with a 1->0 transition.  CHECK is
542  * not used.
543  *
544  * TRUE is returned on 1->0 transitions with the lock held on return
545  * and FALSE is returned otherwise with the lock not held.
546  *
547  * It is important to note that the refs are not stable and may
548  * increase while we hold the lock, the TRUE indication only means
549  * that we transitioned 1->0, not necessarily that we stayed at 0.
550  *
551  * Another thread bumping refs while we hold the lock will set CHECK,
552  * causing one of the competing hammer_ref_interlock() calls to
553  * return TRUE after we release our lock.
554  *
555  * MPSAFE
556  */
557 int
558 hammer_rel_interlock(struct hammer_lock *lock, int locked)
559 {
560 	u_int lv;
561 	u_int nlv;
562 
563 	/*
564 	 * In locked mode (failure/unload path) we release the
565 	 * ref-count but leave it locked.
566 	 */
567 	if (locked) {
568 		hammer_rel(lock);
569 		return(1);
570 	}
571 
572 	/*
573 	 * Integrated reference count drop with LOCKED, plus the hot-path
574 	 * returns.
575 	 */
576 	for (;;) {
577 		lv = lock->refs;
578 
579 		if (lv == 1) {
580 			nlv = 0 | HAMMER_REFS_LOCKED;
581 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
582 				lock->rowner = curthread;
583 				return(1);
584 			}
585 		} else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
586 			if ((lv & HAMMER_REFS_LOCKED) == 0) {
587 				nlv = (lv - 1) | HAMMER_REFS_LOCKED;
588 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
589 					lock->rowner = curthread;
590 					return(1);
591 				}
592 			} else {
593 				nlv = lv | HAMMER_REFS_WANTED;
594 				tsleep_interlock(&lock->refs, 0);
595 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
596 					tsleep(&lock->refs, PINTERLOCKED,
597 					       "h0lk", 0);
598 				}
599 			}
600 		} else {
601 			nlv = (lv - 1);
602 			KKASSERT((int)nlv >= 0);
603 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
604 				return(0);
605 		}
606 	}
607 	/* not reached */
608 }
609 
610 /*
611  * Unlock the interlock acquired by hammer_rel_interlock().
612  *
613  * If orig_locked is non-zero the interlock was originally held prior to
614  * the hammer_rel_interlock() call and passed through to us.  In this
615  * case we want to retain the CHECK error state if not transitioning
616  * to 0.
617  *
618  * The code is the same either way so we do not have to conditionalize
619  * on orig_locked.
620  */
621 void
622 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
623 {
624 	u_int lv;
625 	u_int nlv;
626 
627 	for (;;) {
628 		lv = lock->refs;
629 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
630 		if ((lv & ~HAMMER_REFS_FLAGS) == 0)
631 			nlv &= ~HAMMER_REFS_CHECK;
632 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
633 			if (lv & HAMMER_REFS_WANTED)
634 				wakeup(&lock->refs);
635 			break;
636 		}
637 	}
638 }
639 
640 /*
641  * Acquire the interlock on lock->refs.
642  *
643  * Return TRUE if CHECK is currently set.  Note that CHECK will not
644  * be set if the reference count is 0, but can get set if this function
645  * is preceeded by, say, hammer_ref(), or through races with other
646  * threads.  The return value allows the caller to use the same logic
647  * as hammer_ref_interlock().
648  *
649  * MPSAFE
650  */
651 int
652 hammer_get_interlock(struct hammer_lock *lock)
653 {
654 	u_int lv;
655 	u_int nlv;
656 
657 	for (;;) {
658 		lv = lock->refs;
659 		if (lv & HAMMER_REFS_LOCKED) {
660 			nlv = lv | HAMMER_REFS_WANTED;
661 			tsleep_interlock(&lock->refs, 0);
662 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
663 				tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
664 		} else {
665 			nlv = (lv | HAMMER_REFS_LOCKED);
666 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
667 				lock->rowner = curthread;
668 				return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
669 			}
670 		}
671 	}
672 }
673 
674 /*
675  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
676  * cache callback code to disassociate or lock the bufs related to HAMMER
677  * structures.
678  *
679  * During teardown the related bp will be acquired by hammer_io_release()
680  * which interocks our test.
681  *
682  * Returns non-zero on success, zero on failure.
683  */
684 int
685 hammer_try_interlock_norefs(struct hammer_lock *lock)
686 {
687 	u_int lv;
688 	u_int nlv;
689 
690 	for (;;) {
691 		lv = lock->refs;
692 		if (lv == 0) {
693 			nlv = lv | HAMMER_REFS_LOCKED;
694 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
695 				lock->rowner = curthread;
696 				return(1);
697 			}
698 		} else {
699 			return(0);
700 		}
701 	}
702 	/* not reached */
703 }
704 
705 /*
706  * Release the interlock on lock->refs.  This function will set
707  * CHECK if the refs is non-zero and error is non-zero, and clear
708  * CHECK otherwise.
709  *
710  * MPSAFE
711  */
712 void
713 hammer_put_interlock(struct hammer_lock *lock, int error)
714 {
715 	u_int lv;
716 	u_int nlv;
717 
718 	for (;;) {
719 		lv = lock->refs;
720 		KKASSERT(lv & HAMMER_REFS_LOCKED);
721 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
722 
723 		if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
724 			nlv &= ~HAMMER_REFS_CHECK;
725 		else
726 			nlv |= HAMMER_REFS_CHECK;
727 
728 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
729 			if (lv & HAMMER_REFS_WANTED)
730 				wakeup(&lock->refs);
731 			return;
732 		}
733 	}
734 }
735 
736 /*
737  * The sync_lock must be held when doing any modifying operations on
738  * meta-data.  It does not have to be held when modifying non-meta-data buffers
739  * (backend or frontend).
740  *
741  * The flusher holds the lock exclusively while all other consumers hold it
742  * shared.  All modifying operations made while holding the lock are atomic
743  * in that they will be made part of the same flush group.
744  *
745  * Due to the atomicy requirement deadlock recovery code CANNOT release the
746  * sync lock, nor can we give pending exclusive sync locks priority over
747  * a shared sync lock as this could lead to a 3-way deadlock.
748  */
749 void
750 hammer_sync_lock_ex(hammer_transaction_t trans)
751 {
752 	++trans->sync_lock_refs;
753 	hammer_lock_ex(&trans->hmp->sync_lock);
754 }
755 
756 void
757 hammer_sync_lock_sh(hammer_transaction_t trans)
758 {
759 	++trans->sync_lock_refs;
760 	hammer_lock_sh(&trans->hmp->sync_lock);
761 }
762 
763 int
764 hammer_sync_lock_sh_try(hammer_transaction_t trans)
765 {
766 	int error;
767 
768 	++trans->sync_lock_refs;
769 	if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
770 		--trans->sync_lock_refs;
771 	return (error);
772 }
773 
774 void
775 hammer_sync_unlock(hammer_transaction_t trans)
776 {
777 	--trans->sync_lock_refs;
778 	hammer_unlock(&trans->hmp->sync_lock);
779 }
780 
781 /*
782  * Misc
783  */
784 u_int32_t
785 hammer_to_unix_xid(uuid_t *uuid)
786 {
787 	return(*(u_int32_t *)&uuid->node[2]);
788 }
789 
790 void
791 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
792 {
793 	bzero(uuid, sizeof(*uuid));
794 	*(u_int32_t *)&uuid->node[2] = guid;
795 }
796 
797 void
798 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
799 {
800 	ts->tv_sec = (unsigned long)(xtime / 1000000);
801 	ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
802 }
803 
804 u_int64_t
805 hammer_timespec_to_time(struct timespec *ts)
806 {
807 	u_int64_t xtime;
808 
809 	xtime = (unsigned)(ts->tv_nsec / 1000) +
810 		(unsigned long)ts->tv_sec * 1000000ULL;
811 	return(xtime);
812 }
813 
814 
815 /*
816  * Convert a HAMMER filesystem object type to a vnode type
817  */
818 enum vtype
819 hammer_get_vnode_type(u_int8_t obj_type)
820 {
821 	switch(obj_type) {
822 	case HAMMER_OBJTYPE_DIRECTORY:
823 		return(VDIR);
824 	case HAMMER_OBJTYPE_REGFILE:
825 		return(VREG);
826 	case HAMMER_OBJTYPE_DBFILE:
827 		return(VDATABASE);
828 	case HAMMER_OBJTYPE_FIFO:
829 		return(VFIFO);
830 	case HAMMER_OBJTYPE_SOCKET:
831 		return(VSOCK);
832 	case HAMMER_OBJTYPE_CDEV:
833 		return(VCHR);
834 	case HAMMER_OBJTYPE_BDEV:
835 		return(VBLK);
836 	case HAMMER_OBJTYPE_SOFTLINK:
837 		return(VLNK);
838 	default:
839 		return(VBAD);
840 	}
841 	/* not reached */
842 }
843 
844 int
845 hammer_get_dtype(u_int8_t obj_type)
846 {
847 	switch(obj_type) {
848 	case HAMMER_OBJTYPE_DIRECTORY:
849 		return(DT_DIR);
850 	case HAMMER_OBJTYPE_REGFILE:
851 		return(DT_REG);
852 	case HAMMER_OBJTYPE_DBFILE:
853 		return(DT_DBF);
854 	case HAMMER_OBJTYPE_FIFO:
855 		return(DT_FIFO);
856 	case HAMMER_OBJTYPE_SOCKET:
857 		return(DT_SOCK);
858 	case HAMMER_OBJTYPE_CDEV:
859 		return(DT_CHR);
860 	case HAMMER_OBJTYPE_BDEV:
861 		return(DT_BLK);
862 	case HAMMER_OBJTYPE_SOFTLINK:
863 		return(DT_LNK);
864 	default:
865 		return(DT_UNKNOWN);
866 	}
867 	/* not reached */
868 }
869 
870 u_int8_t
871 hammer_get_obj_type(enum vtype vtype)
872 {
873 	switch(vtype) {
874 	case VDIR:
875 		return(HAMMER_OBJTYPE_DIRECTORY);
876 	case VREG:
877 		return(HAMMER_OBJTYPE_REGFILE);
878 	case VDATABASE:
879 		return(HAMMER_OBJTYPE_DBFILE);
880 	case VFIFO:
881 		return(HAMMER_OBJTYPE_FIFO);
882 	case VSOCK:
883 		return(HAMMER_OBJTYPE_SOCKET);
884 	case VCHR:
885 		return(HAMMER_OBJTYPE_CDEV);
886 	case VBLK:
887 		return(HAMMER_OBJTYPE_BDEV);
888 	case VLNK:
889 		return(HAMMER_OBJTYPE_SOFTLINK);
890 	default:
891 		return(HAMMER_OBJTYPE_UNKNOWN);
892 	}
893 	/* not reached */
894 }
895 
896 /*
897  * Return flags for hammer_delete_at_cursor()
898  */
899 int
900 hammer_nohistory(hammer_inode_t ip)
901 {
902 	if (ip->hmp->hflags & HMNT_NOHISTORY)
903 		return(HAMMER_DELETE_DESTROY);
904 	if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
905 		return(HAMMER_DELETE_DESTROY);
906 	return(0);
907 }
908 
909 /*
910  * ALGORITHM VERSION 0:
911  *	Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
912  *	crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
913  *	to generate a unique key and will scan all entries with the same upper
914  *	32 bits when issuing a lookup.
915  *
916  *	0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
917  *
918  * ALGORITHM VERSION 1:
919  *
920  *	This algorithm breaks the filename down into a separate 32-bit crcs
921  *	for each filename segment separated by a special character (dot,
922  *	underscore, underline, or tilde).  The CRCs are then added together.
923  *	This allows temporary names.  A full-filename 16 bit crc is also
924  *	generated to deal with degenerate conditions.
925  *
926  *	The algorithm is designed to handle create/rename situations such
927  *	that a create with an extention to a rename without an extention
928  *	only shifts the key space rather than randomizes it.
929  *
930  *	NOTE: The inode allocator cache can only match 10 bits so we do
931  *	      not really have any room for a partial sorted name, and
932  *	      numbers don't sort well in that situation anyway.
933  *
934  *	0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
935  *
936  *
937  * We strip bit 63 in order to provide a positive key, this way a seek
938  * offset of 0 will represent the base of the directory.
939  *
940  * We usually strip bit 0 (set it to 0) in order to provide a consistent
941  * iteration space for collisions.
942  *
943  * This function can never return 0.  We use the MSB-0 space to synthesize
944  * artificial directory entries such as "." and "..".
945  */
946 int64_t
947 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
948 			 u_int32_t *max_iterationsp)
949 {
950 	const char *aname = name;
951 	int32_t crcx;
952 	int64_t key;
953 	int i;
954 	int j;
955 
956 	switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
957 	case HAMMER_INODE_CAP_DIRHASH_ALG0:
958 		/*
959 		 * Original algorithm
960 		 */
961 		key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
962 		if (key == 0)
963 			key |= 0x100000000LL;
964 		*max_iterationsp = 0xFFFFFFFFU;
965 		break;
966 	case HAMMER_INODE_CAP_DIRHASH_ALG1:
967 		/*
968 		 * Filesystem version 6 or better will create directories
969 		 * using the ALG1 dirhash.  This hash breaks the filename
970 		 * up into domains separated by special characters and
971 		 * hashes each domain independently.
972 		 *
973 		 * We also do a simple sub-sort using the first character
974 		 * of the filename in the top 5-bits.
975 		 */
976 		key = 0;
977 
978 		/*
979 		 * m32
980 		 */
981 		crcx = 0;
982 		for (i = j = 0; i < len; ++i) {
983 			if (aname[i] == '.' ||
984 			    aname[i] == '-' ||
985 			    aname[i] == '_' ||
986 			    aname[i] == '~') {
987 				if (i != j)
988 					crcx += crc32(aname + j, i - j);
989 				j = i + 1;
990 			}
991 		}
992 		if (i != j)
993 			crcx += crc32(aname + j, i - j);
994 
995 #if 0
996 		/*
997 		 * xor top 5 bits 0mmmm into low bits and steal the top 5
998 		 * bits as a semi sub sort using the first character of
999 		 * the filename.  bit 63 is always left as 0 so directory
1000 		 * keys are positive numbers.
1001 		 */
1002 		crcx ^= (uint32_t)crcx >> (32 - 5);
1003 		crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1004 #endif
1005 		crcx &= 0x7FFFFFFFU;
1006 
1007 		key |= (uint64_t)crcx << 32;
1008 
1009 		/*
1010 		 * l16 - crc of entire filename
1011 		 *
1012 		 * This crc reduces degenerate hash collision conditions
1013 		 */
1014 		crcx = crc32(aname, len);
1015 		crcx = crcx ^ (crcx << 16);
1016 		key |= crcx & 0xFFFF0000U;
1017 
1018 		/*
1019 		 * Cleanup
1020 		 */
1021 		if ((key & 0xFFFFFFFF00000000LL) == 0)
1022 			key |= 0x100000000LL;
1023 		if (hammer_debug_general & 0x0400) {
1024 			kprintf("namekey2: 0x%016llx %*.*s\n",
1025 				(long long)key, len, len, aname);
1026 		}
1027 		*max_iterationsp = 0x00FFFFFF;
1028 		break;
1029 	case HAMMER_INODE_CAP_DIRHASH_ALG2:
1030 	case HAMMER_INODE_CAP_DIRHASH_ALG3:
1031 	default:
1032 		key = 0;			/* compiler warning */
1033 		*max_iterationsp = 1;		/* sanity */
1034 		panic("hammer_directory_namekey: bad algorithm %p", dip);
1035 		break;
1036 	}
1037 	return(key);
1038 }
1039 
1040 /*
1041  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1042  * EINVAL on failure.
1043  *
1044  * If this function fails *ispfs, *tidp, and *localizationp will not
1045  * be modified.
1046  */
1047 int
1048 hammer_str_to_tid(const char *str, int *ispfsp,
1049 		  hammer_tid_t *tidp, u_int32_t *localizationp)
1050 {
1051 	hammer_tid_t tid;
1052 	u_int32_t localization;
1053 	char *ptr;
1054 	int ispfs;
1055 	int n;
1056 
1057 	/*
1058 	 * Forms allowed for TID:  "0x%016llx"
1059 	 *			   "-1"
1060 	 */
1061 	tid = strtouq(str, &ptr, 0);
1062 	n = ptr - str;
1063 	if (n == 2 && str[0] == '-' && str[1] == '1') {
1064 		/* ok */
1065 	} else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1066 		/* ok */
1067 	} else {
1068 		return(EINVAL);
1069 	}
1070 
1071 	/*
1072 	 * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1073 	 */
1074 	str = ptr;
1075 	if (*str == ':') {
1076 		localization = strtoul(str + 1, &ptr, 10) << 16;
1077 		if (ptr - str != 6)
1078 			return(EINVAL);
1079 		str = ptr;
1080 		ispfs = 1;
1081 	} else {
1082 		localization = *localizationp;
1083 		ispfs = 0;
1084 	}
1085 
1086 	/*
1087 	 * Any trailing junk invalidates special extension handling.
1088 	 */
1089 	if (*str)
1090 		return(EINVAL);
1091 	*tidp = tid;
1092 	*localizationp = localization;
1093 	*ispfsp = ispfs;
1094 	return(0);
1095 }
1096 
1097 void
1098 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1099 {
1100 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1101 }
1102 
1103 void
1104 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1105 {
1106 	ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1107 			  crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1108 }
1109 
1110 int
1111 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1112 {
1113 	hammer_crc_t crc;
1114 
1115 	crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1116 	return (blockmap->entry_crc == crc);
1117 }
1118 
1119 int
1120 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1121 {
1122 	hammer_crc_t crc;
1123 
1124 	crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1125 	      crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1126 	return (ondisk->vol_crc == crc);
1127 }
1128 
1129 int
1130 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1131 {
1132 	hammer_crc_t crc;
1133 
1134 	crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1135 	return (ondisk->crc == crc);
1136 }
1137 
1138 /*
1139  * Test or set the leaf->data_crc field.  Deal with any special cases given
1140  * a generic B-Tree leaf element and its data.
1141  *
1142  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1143  *       to be updated in-place.
1144  */
1145 int
1146 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1147 {
1148 	hammer_crc_t crc;
1149 
1150 	if (leaf->data_len == 0) {
1151 		crc = 0;
1152 	} else {
1153 		switch(leaf->base.rec_type) {
1154 		case HAMMER_RECTYPE_INODE:
1155 			if (leaf->data_len != sizeof(struct hammer_inode_data))
1156 				return(0);
1157 			crc = crc32(data, HAMMER_INODE_CRCSIZE);
1158 			break;
1159 		default:
1160 			crc = crc32(data, leaf->data_len);
1161 			break;
1162 		}
1163 	}
1164 	return (leaf->data_crc == crc);
1165 }
1166 
1167 void
1168 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1169 {
1170 	if (leaf->data_len == 0) {
1171 		leaf->data_crc = 0;
1172 	} else {
1173 		switch(leaf->base.rec_type) {
1174 		case HAMMER_RECTYPE_INODE:
1175 			KKASSERT(leaf->data_len ==
1176 				  sizeof(struct hammer_inode_data));
1177 			leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1178 			break;
1179 		default:
1180 			leaf->data_crc = crc32(data, leaf->data_len);
1181 			break;
1182 		}
1183 	}
1184 }
1185 
1186 void
1187 hkprintf(const char *ctl, ...)
1188 {
1189 	__va_list va;
1190 
1191 	if (hammer_debug_debug) {
1192 		__va_start(va, ctl);
1193 		kvprintf(ctl, va);
1194 		__va_end(va);
1195 	}
1196 }
1197 
1198 /*
1199  * Return the block size at the specified file offset.
1200  */
1201 int
1202 hammer_blocksize(int64_t file_offset)
1203 {
1204 	if (file_offset < HAMMER_XDEMARC)
1205 		return(HAMMER_BUFSIZE);
1206 	else
1207 		return(HAMMER_XBUFSIZE);
1208 }
1209 
1210 int
1211 hammer_blockoff(int64_t file_offset)
1212 {
1213 	if (file_offset < HAMMER_XDEMARC)
1214 		return((int)file_offset & HAMMER_BUFMASK);
1215 	else
1216 		return((int)file_offset & HAMMER_XBUFMASK);
1217 }
1218 
1219 /*
1220  * Return the demarkation point between the two offsets where
1221  * the block size changes.
1222  */
1223 int64_t
1224 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1225 {
1226 	if (file_offset1 < HAMMER_XDEMARC) {
1227 		if (file_offset2 <= HAMMER_XDEMARC)
1228 			return(file_offset2);
1229 		return(HAMMER_XDEMARC);
1230 	}
1231 	panic("hammer_blockdemarc: illegal range %lld %lld",
1232 	      (long long)file_offset1, (long long)file_offset2);
1233 }
1234 
1235 udev_t
1236 hammer_fsid_to_udev(uuid_t *uuid)
1237 {
1238 	u_int32_t crc;
1239 
1240 	crc = crc32(uuid, sizeof(*uuid));
1241 	return((udev_t)crc);
1242 }
1243 
1244