xref: /dragonfly/sys/vfs/hammer/hammer_subs.c (revision 279dd846)
1 /*
2  * Copyright (c) 2007-2011 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER structural locking
36  */
37 
38 #include <sys/dirent.h>
39 
40 #include "hammer.h"
41 
42 void
43 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
44 {
45 	thread_t td = curthread;
46 	u_int lv;
47 	u_int nlv;
48 
49 	KKASSERT(lock->refs);
50 	for (;;) {
51 		lv = lock->lockval;
52 
53 		if (lv == 0) {
54 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
55 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
56 				lock->lowner = td;
57 				break;
58 			}
59 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
60 			   lock->lowner == td) {
61 			nlv = (lv + 1);
62 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
63 				break;
64 		} else {
65 			if (hammer_debug_locks) {
66 				kprintf("hammer_lock_ex: held by %p\n",
67 					lock->lowner);
68 			}
69 			nlv = lv | HAMMER_LOCKF_WANTED;
70 			++hammer_contention_count;
71 			tsleep_interlock(&lock->lockval, 0);
72 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
73 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
74 				if (hammer_debug_locks)
75 					kprintf("hammer_lock_ex: try again\n");
76 			}
77 		}
78 	}
79 }
80 
81 /*
82  * Try to obtain an exclusive lock
83  */
84 int
85 hammer_lock_ex_try(struct hammer_lock *lock)
86 {
87 	thread_t td = curthread;
88 	int error;
89 	u_int lv;
90 	u_int nlv;
91 
92 	KKASSERT(lock->refs);
93 	for (;;) {
94 		lv = lock->lockval;
95 
96 		if (lv == 0) {
97 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
98 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
99 				lock->lowner = td;
100 				error = 0;
101 				break;
102 			}
103 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
104 			   lock->lowner == td) {
105 			nlv = (lv + 1);
106 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
107 				error = 0;
108 				break;
109 			}
110 		} else {
111 			error = EAGAIN;
112 			break;
113 		}
114 	}
115 	return (error);
116 }
117 
118 /*
119  * Obtain a shared lock
120  *
121  * We do not give pending exclusive locks priority over shared locks as
122  * doing so could lead to a deadlock.
123  */
124 void
125 hammer_lock_sh(struct hammer_lock *lock)
126 {
127 	thread_t td = curthread;
128 	u_int lv;
129 	u_int nlv;
130 	const char *ident = "hmrlck";
131 
132 	KKASSERT(lock->refs);
133 	for (;;) {
134 		lv = lock->lockval;
135 
136 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
137 			nlv = (lv + 1);
138 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
139 				break;
140 		} else if (lock->lowner == td) {
141 			/*
142 			 * Disallowed case, drop into kernel debugger for
143 			 * now.  A cont continues w/ an exclusive lock.
144 			 */
145 			nlv = (lv + 1);
146 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
147 				if (hammer_debug_critical)
148 					Debugger("hammer_lock_sh: holding ex");
149 				break;
150 			}
151 		} else {
152 			nlv = lv | HAMMER_LOCKF_WANTED;
153 			++hammer_contention_count;
154 			tsleep_interlock(&lock->lockval, 0);
155 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
156 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
157 		}
158 	}
159 }
160 
161 int
162 hammer_lock_sh_try(struct hammer_lock *lock)
163 {
164 	thread_t td = curthread;
165 	u_int lv;
166 	u_int nlv;
167 	int error;
168 
169 	KKASSERT(lock->refs);
170 	for (;;) {
171 		lv = lock->lockval;
172 
173 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
174 			nlv = (lv + 1);
175 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
176 				error = 0;
177 				break;
178 			}
179 		} else if (lock->lowner == td) {
180 			/*
181 			 * Disallowed case, drop into kernel debugger for
182 			 * now.  A cont continues w/ an exclusive lock.
183 			 */
184 			nlv = (lv + 1);
185 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
186 				if (hammer_debug_critical)
187 					Debugger("hammer_lock_sh: holding ex");
188 				error = 0;
189 				break;
190 			}
191 		} else {
192 			error = EAGAIN;
193 			break;
194 		}
195 	}
196 	return (error);
197 }
198 
199 /*
200  * Upgrade a shared lock to an exclusively held lock.  This function will
201  * return EDEADLK If there is more then one shared holder.
202  *
203  * No error occurs and no action is taken if the lock is already exclusively
204  * held by the caller.  If the lock is not held at all or held exclusively
205  * by someone else, this function will panic.
206  */
207 int
208 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
209 {
210 	thread_t td = curthread;
211 	u_int lv;
212 	u_int nlv;
213 	int error;
214 
215 	for (;;) {
216 		lv = lock->lockval;
217 
218 		if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
219 			nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
220 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
221 				lock->lowner = td;
222 				error = 0;
223 				break;
224 			}
225 		} else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
226 			if (lock->lowner != curthread)
227 				panic("hammer_lock_upgrade: illegal state");
228 			error = 0;
229 			break;
230 		} else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
231 			panic("hammer_lock_upgrade: lock is not held");
232 			/* NOT REACHED */
233 			error = EDEADLK;
234 			break;
235 		} else {
236 			error = EDEADLK;
237 			break;
238 		}
239 	}
240 	return (error);
241 }
242 
243 /*
244  * Downgrade an exclusively held lock to a shared lock.
245  */
246 void
247 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
248 {
249 	thread_t td __debugvar = curthread;
250 	u_int lv;
251 	u_int nlv;
252 
253 	KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
254 		 (HAMMER_LOCKF_EXCLUSIVE | shcount));
255 	KKASSERT(lock->lowner == td);
256 
257 	/*
258 	 * NOTE: Must clear owner before releasing exclusivity
259 	 */
260 	lock->lowner = NULL;
261 
262 	for (;;) {
263 		lv = lock->lockval;
264 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
265 		if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
266 			if (lv & HAMMER_LOCKF_WANTED)
267 				wakeup(&lock->lockval);
268 			break;
269 		}
270 	}
271 }
272 
273 void
274 hammer_unlock(struct hammer_lock *lock)
275 {
276 	thread_t td __debugvar = curthread;
277 	u_int lv;
278 	u_int nlv;
279 
280 	lv = lock->lockval;
281 	KKASSERT(lv != 0);
282 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
283 		KKASSERT(lock->lowner == td);
284 
285 	for (;;) {
286 		lv = lock->lockval;
287 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
288 		if (nlv > 1) {
289 			nlv = lv - 1;
290 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
291 				break;
292 		} else if (nlv == 1) {
293 			nlv = 0;
294 			if (lv & HAMMER_LOCKF_EXCLUSIVE)
295 				lock->lowner = NULL;
296 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
297 				if (lv & HAMMER_LOCKF_WANTED)
298 					wakeup(&lock->lockval);
299 				break;
300 			}
301 		} else {
302 			panic("hammer_unlock: lock %p is not held", lock);
303 		}
304 	}
305 }
306 
307 /*
308  * The calling thread must be holding a shared or exclusive lock.
309  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
310  */
311 int
312 hammer_lock_status(struct hammer_lock *lock)
313 {
314 	u_int lv = lock->lockval;
315 
316 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
317 		return(1);
318 	else if (lv)
319 		return(-1);
320 	panic("hammer_lock_status: lock must be held: %p", lock);
321 }
322 
323 /*
324  * Bump the ref count for a lock (not the excl/share count, but a separate
325  * structural reference count).  The CHECK flag will be set on a 0->1
326  * transition.
327  *
328  * This function does nothing to serialize races between multple threads.
329  * The caller can interlock it later on to deal with serialization.
330  *
331  * MPSAFE
332  */
333 void
334 hammer_ref(struct hammer_lock *lock)
335 {
336 	u_int lv;
337 	u_int nlv;
338 
339 	for (;;) {
340 		lv = lock->refs;
341 		if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
342 			nlv = (lv + 1) | HAMMER_REFS_CHECK;
343 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
344 				return;
345 		} else {
346 			nlv = (lv + 1);
347 			KKASSERT((int)nlv > 0);
348 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
349 				return;
350 		}
351 	}
352 	/* not reached */
353 }
354 
355 /*
356  * Drop the ref count for a lock (not the excl/share count, but a separate
357  * structural reference count).  The CHECK flag will be cleared on a 1->0
358  * transition.
359  *
360  * This function does nothing to serialize races between multple threads.
361  *
362  * MPSAFE
363  */
364 void
365 hammer_rel(struct hammer_lock *lock)
366 {
367 	u_int lv;
368 	u_int nlv;
369 
370 	for (;;) {
371 		lv = lock->refs;
372 		if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
373 			nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
374 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
375 				return;
376 		} else {
377 			KKASSERT((int)lv > 0);
378 			nlv = (lv - 1);
379 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
380 				return;
381 		}
382 	}
383 	/* not reached */
384 }
385 
386 /*
387  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
388  * more sophisticated versions which handle MP transition races and block
389  * when necessary.
390  *
391  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
392  * the interlock for 0->1 transitions or if the CHECK is found to be set.
393  *
394  * This case will return 1, the interlock will be held, and the CHECK
395  * bit also set.  Other threads attempting to ref will see the CHECK bit
396  * and block until we clean up.
397  *
398  * 0 is returned for transitions other than 0->1 when the CHECK bit
399  * is not found to be set, or if the function loses the race with another
400  * thread.
401  *
402  * 1 is only returned to one thread and the others will block.
403  * Effectively a 1 indicator means 'someone transitioned 0->1
404  * and you are the first guy to successfully lock it after that, so you
405  * need to check'.  Due to races the ref-count may be greater than 1 upon
406  * return.
407  *
408  * MPSAFE
409  */
410 int
411 hammer_ref_interlock(struct hammer_lock *lock)
412 {
413 	u_int lv;
414 	u_int nlv;
415 
416 	/*
417 	 * Integrated reference count bump, lock, and check, with hot-path.
418 	 *
419 	 * (a) Return 1	(+LOCKED, +CHECK)	0->1 transition
420 	 * (b) Return 0 (-LOCKED, -CHECK)	N->N+1 transition
421 	 * (c) Break out (+CHECK)		Check condition and Cannot lock
422 	 * (d) Return 1 (+LOCKED, +CHECK)	Successfully locked
423 	 */
424 	for (;;) {
425 		lv = lock->refs;
426 		if (lv == 0) {
427 			nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
428 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
429 				lock->rowner = curthread;
430 				return(1);
431 			}
432 		} else {
433 			nlv = (lv + 1);
434 			if ((lv & ~HAMMER_REFS_FLAGS) == 0)
435 				nlv |= HAMMER_REFS_CHECK;
436 			if ((nlv & HAMMER_REFS_CHECK) == 0) {
437 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
438 					return(0);
439 			} else if (lv & HAMMER_REFS_LOCKED) {
440 				/* CHECK also set here */
441 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
442 					break;
443 			} else {
444 				/* CHECK also set here */
445 				nlv |= HAMMER_REFS_LOCKED;
446 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
447 					lock->rowner = curthread;
448 					return(1);
449 				}
450 			}
451 		}
452 	}
453 
454 	/*
455 	 * Defered check condition because we were unable to acquire the
456 	 * lock.  We must block until the check condition is cleared due
457 	 * to a race with another thread, or we are able to acquire the
458 	 * lock.
459 	 *
460 	 * (a) Return 0	(-CHECK)		Another thread handled it
461 	 * (b) Return 1 (+LOCKED, +CHECK)	We handled it.
462 	 */
463 	for (;;) {
464 		lv = lock->refs;
465 		if ((lv & HAMMER_REFS_CHECK) == 0)
466 			return(0);
467 		if (lv & HAMMER_REFS_LOCKED) {
468 			tsleep_interlock(&lock->refs, 0);
469 			nlv = (lv | HAMMER_REFS_WANTED);
470 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
471 				tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
472 		} else {
473 			/* CHECK also set here */
474 			nlv = lv | HAMMER_REFS_LOCKED;
475 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
476 				lock->rowner = curthread;
477 				return(1);
478 			}
479 		}
480 	}
481 	/* not reached */
482 }
483 
484 /*
485  * This is the same as hammer_ref_interlock() but asserts that the
486  * 0->1 transition is always true, thus the lock must have no references
487  * on entry or have CHECK set, and will have one reference with the
488  * interlock held on return.  It must also not be interlocked on entry
489  * by anyone.
490  *
491  * NOTE that CHECK will never be found set when the ref-count is 0.
492  *
493  * 1 is always returned to match the API for hammer_ref_interlock().
494  * This function returns with one ref, the lock held, and the CHECK bit set.
495  */
496 int
497 hammer_ref_interlock_true(struct hammer_lock *lock)
498 {
499 	u_int lv;
500 	u_int nlv;
501 
502 	for (;;) {
503 		lv = lock->refs;
504 
505 		if (lv) {
506 			panic("hammer_ref_interlock_true: bad lock %p %08x",
507 			      lock, lock->refs);
508 		}
509 		nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
510 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
511 			lock->rowner = curthread;
512 			return (1);
513 		}
514 	}
515 }
516 
517 /*
518  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
519  * CHECK flag.  The ref-count remains unchanged.
520  *
521  * This routine is called in the load path when the load succeeds.
522  */
523 void
524 hammer_ref_interlock_done(struct hammer_lock *lock)
525 {
526 	u_int lv;
527 	u_int nlv;
528 
529 	for (;;) {
530 		lv = lock->refs;
531 		nlv = lv & ~HAMMER_REFS_FLAGS;
532 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
533 			if (lv & HAMMER_REFS_WANTED)
534 				wakeup(&lock->refs);
535 			break;
536 		}
537 	}
538 }
539 
540 /*
541  * hammer_rel_interlock() works a bit differently in that it must
542  * acquire the lock in tandem with a 1->0 transition.  CHECK is
543  * not used.
544  *
545  * 1 is returned on 1->0 transitions with the lock held on return
546  * and 0 is returned otherwise with the lock not held.
547  *
548  * It is important to note that the refs are not stable and may
549  * increase while we hold the lock, the 1 indication only means
550  * that we transitioned 1->0, not necessarily that we stayed at 0.
551  *
552  * Another thread bumping refs while we hold the lock will set CHECK,
553  * causing one of the competing hammer_ref_interlock() calls to
554  * return 1 after we release our lock.
555  *
556  * MPSAFE
557  */
558 int
559 hammer_rel_interlock(struct hammer_lock *lock, int locked)
560 {
561 	u_int lv;
562 	u_int nlv;
563 
564 	/*
565 	 * In locked mode (failure/unload path) we release the
566 	 * ref-count but leave it locked.
567 	 */
568 	if (locked) {
569 		hammer_rel(lock);
570 		return(1);
571 	}
572 
573 	/*
574 	 * Integrated reference count drop with LOCKED, plus the hot-path
575 	 * returns.
576 	 */
577 	for (;;) {
578 		lv = lock->refs;
579 
580 		if (lv == 1) {
581 			nlv = 0 | HAMMER_REFS_LOCKED;
582 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
583 				lock->rowner = curthread;
584 				return(1);
585 			}
586 		} else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
587 			if ((lv & HAMMER_REFS_LOCKED) == 0) {
588 				nlv = (lv - 1) | HAMMER_REFS_LOCKED;
589 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
590 					lock->rowner = curthread;
591 					return(1);
592 				}
593 			} else {
594 				nlv = lv | HAMMER_REFS_WANTED;
595 				tsleep_interlock(&lock->refs, 0);
596 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
597 					tsleep(&lock->refs, PINTERLOCKED,
598 					       "h0lk", 0);
599 				}
600 			}
601 		} else {
602 			nlv = (lv - 1);
603 			KKASSERT((int)nlv >= 0);
604 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
605 				return(0);
606 		}
607 	}
608 	/* not reached */
609 }
610 
611 /*
612  * Unlock the interlock acquired by hammer_rel_interlock().
613  *
614  * If orig_locked is non-zero the interlock was originally held prior to
615  * the hammer_rel_interlock() call and passed through to us.  In this
616  * case we want to retain the CHECK error state if not transitioning
617  * to 0.
618  *
619  * The code is the same either way so we do not have to conditionalize
620  * on orig_locked.
621  */
622 void
623 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
624 {
625 	u_int lv;
626 	u_int nlv;
627 
628 	for (;;) {
629 		lv = lock->refs;
630 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
631 		if ((lv & ~HAMMER_REFS_FLAGS) == 0)
632 			nlv &= ~HAMMER_REFS_CHECK;
633 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
634 			if (lv & HAMMER_REFS_WANTED)
635 				wakeup(&lock->refs);
636 			break;
637 		}
638 	}
639 }
640 
641 /*
642  * Acquire the interlock on lock->refs.
643  *
644  * Return 1 if CHECK is currently set.  Note that CHECK will not
645  * be set if the reference count is 0, but can get set if this function
646  * is preceeded by, say, hammer_ref(), or through races with other
647  * threads.  The return value allows the caller to use the same logic
648  * as hammer_ref_interlock().
649  *
650  * MPSAFE
651  */
652 int
653 hammer_get_interlock(struct hammer_lock *lock)
654 {
655 	u_int lv;
656 	u_int nlv;
657 
658 	for (;;) {
659 		lv = lock->refs;
660 		if (lv & HAMMER_REFS_LOCKED) {
661 			nlv = lv | HAMMER_REFS_WANTED;
662 			tsleep_interlock(&lock->refs, 0);
663 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
664 				tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
665 		} else {
666 			nlv = (lv | HAMMER_REFS_LOCKED);
667 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
668 				lock->rowner = curthread;
669 				return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
670 			}
671 		}
672 	}
673 }
674 
675 /*
676  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
677  * cache callback code to disassociate or lock the bufs related to HAMMER
678  * structures.
679  *
680  * During teardown the related bp will be acquired by hammer_io_release()
681  * which interocks our test.
682  *
683  * Returns non-zero on success, zero on failure.
684  */
685 int
686 hammer_try_interlock_norefs(struct hammer_lock *lock)
687 {
688 	u_int lv;
689 	u_int nlv;
690 
691 	for (;;) {
692 		lv = lock->refs;
693 		if (lv == 0) {
694 			nlv = lv | HAMMER_REFS_LOCKED;
695 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
696 				lock->rowner = curthread;
697 				return(1);
698 			}
699 		} else {
700 			return(0);
701 		}
702 	}
703 	/* not reached */
704 }
705 
706 /*
707  * Release the interlock on lock->refs.  This function will set
708  * CHECK if the refs is non-zero and error is non-zero, and clear
709  * CHECK otherwise.
710  *
711  * MPSAFE
712  */
713 void
714 hammer_put_interlock(struct hammer_lock *lock, int error)
715 {
716 	u_int lv;
717 	u_int nlv;
718 
719 	for (;;) {
720 		lv = lock->refs;
721 		KKASSERT(lv & HAMMER_REFS_LOCKED);
722 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
723 
724 		if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
725 			nlv &= ~HAMMER_REFS_CHECK;
726 		else
727 			nlv |= HAMMER_REFS_CHECK;
728 
729 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
730 			if (lv & HAMMER_REFS_WANTED)
731 				wakeup(&lock->refs);
732 			return;
733 		}
734 	}
735 }
736 
737 /*
738  * The sync_lock must be held when doing any modifying operations on
739  * meta-data.  It does not have to be held when modifying non-meta-data buffers
740  * (backend or frontend).
741  *
742  * The flusher holds the lock exclusively while all other consumers hold it
743  * shared.  All modifying operations made while holding the lock are atomic
744  * in that they will be made part of the same flush group.
745  *
746  * Due to the atomicy requirement deadlock recovery code CANNOT release the
747  * sync lock, nor can we give pending exclusive sync locks priority over
748  * a shared sync lock as this could lead to a 3-way deadlock.
749  */
750 void
751 hammer_sync_lock_ex(hammer_transaction_t trans)
752 {
753 	++trans->sync_lock_refs;
754 	hammer_lock_ex(&trans->hmp->sync_lock);
755 }
756 
757 void
758 hammer_sync_lock_sh(hammer_transaction_t trans)
759 {
760 	++trans->sync_lock_refs;
761 	hammer_lock_sh(&trans->hmp->sync_lock);
762 }
763 
764 int
765 hammer_sync_lock_sh_try(hammer_transaction_t trans)
766 {
767 	int error;
768 
769 	++trans->sync_lock_refs;
770 	if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
771 		--trans->sync_lock_refs;
772 	return (error);
773 }
774 
775 void
776 hammer_sync_unlock(hammer_transaction_t trans)
777 {
778 	--trans->sync_lock_refs;
779 	hammer_unlock(&trans->hmp->sync_lock);
780 }
781 
782 /*
783  * Misc
784  */
785 u_int32_t
786 hammer_to_unix_xid(uuid_t *uuid)
787 {
788 	return(*(u_int32_t *)&uuid->node[2]);
789 }
790 
791 void
792 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
793 {
794 	bzero(uuid, sizeof(*uuid));
795 	*(u_int32_t *)&uuid->node[2] = guid;
796 }
797 
798 void
799 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
800 {
801 	ts->tv_sec = (unsigned long)(xtime / 1000000);
802 	ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
803 }
804 
805 u_int64_t
806 hammer_timespec_to_time(struct timespec *ts)
807 {
808 	u_int64_t xtime;
809 
810 	xtime = (unsigned)(ts->tv_nsec / 1000) +
811 		(unsigned long)ts->tv_sec * 1000000ULL;
812 	return(xtime);
813 }
814 
815 
816 /*
817  * Convert a HAMMER filesystem object type to a vnode type
818  */
819 enum vtype
820 hammer_get_vnode_type(u_int8_t obj_type)
821 {
822 	switch(obj_type) {
823 	case HAMMER_OBJTYPE_DIRECTORY:
824 		return(VDIR);
825 	case HAMMER_OBJTYPE_REGFILE:
826 		return(VREG);
827 	case HAMMER_OBJTYPE_DBFILE:
828 		return(VDATABASE);
829 	case HAMMER_OBJTYPE_FIFO:
830 		return(VFIFO);
831 	case HAMMER_OBJTYPE_SOCKET:
832 		return(VSOCK);
833 	case HAMMER_OBJTYPE_CDEV:
834 		return(VCHR);
835 	case HAMMER_OBJTYPE_BDEV:
836 		return(VBLK);
837 	case HAMMER_OBJTYPE_SOFTLINK:
838 		return(VLNK);
839 	default:
840 		return(VBAD);
841 	}
842 	/* not reached */
843 }
844 
845 int
846 hammer_get_dtype(u_int8_t obj_type)
847 {
848 	switch(obj_type) {
849 	case HAMMER_OBJTYPE_DIRECTORY:
850 		return(DT_DIR);
851 	case HAMMER_OBJTYPE_REGFILE:
852 		return(DT_REG);
853 	case HAMMER_OBJTYPE_DBFILE:
854 		return(DT_DBF);
855 	case HAMMER_OBJTYPE_FIFO:
856 		return(DT_FIFO);
857 	case HAMMER_OBJTYPE_SOCKET:
858 		return(DT_SOCK);
859 	case HAMMER_OBJTYPE_CDEV:
860 		return(DT_CHR);
861 	case HAMMER_OBJTYPE_BDEV:
862 		return(DT_BLK);
863 	case HAMMER_OBJTYPE_SOFTLINK:
864 		return(DT_LNK);
865 	default:
866 		return(DT_UNKNOWN);
867 	}
868 	/* not reached */
869 }
870 
871 u_int8_t
872 hammer_get_obj_type(enum vtype vtype)
873 {
874 	switch(vtype) {
875 	case VDIR:
876 		return(HAMMER_OBJTYPE_DIRECTORY);
877 	case VREG:
878 		return(HAMMER_OBJTYPE_REGFILE);
879 	case VDATABASE:
880 		return(HAMMER_OBJTYPE_DBFILE);
881 	case VFIFO:
882 		return(HAMMER_OBJTYPE_FIFO);
883 	case VSOCK:
884 		return(HAMMER_OBJTYPE_SOCKET);
885 	case VCHR:
886 		return(HAMMER_OBJTYPE_CDEV);
887 	case VBLK:
888 		return(HAMMER_OBJTYPE_BDEV);
889 	case VLNK:
890 		return(HAMMER_OBJTYPE_SOFTLINK);
891 	default:
892 		return(HAMMER_OBJTYPE_UNKNOWN);
893 	}
894 	/* not reached */
895 }
896 
897 /*
898  * Return flags for hammer_delete_at_cursor()
899  */
900 int
901 hammer_nohistory(hammer_inode_t ip)
902 {
903 	if (ip->hmp->hflags & HMNT_NOHISTORY)
904 		return(HAMMER_DELETE_DESTROY);
905 	if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
906 		return(HAMMER_DELETE_DESTROY);
907 	return(0);
908 }
909 
910 /*
911  * ALGORITHM VERSION 0:
912  *	Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
913  *	crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
914  *	to generate a unique key and will scan all entries with the same upper
915  *	32 bits when issuing a lookup.
916  *
917  *	0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
918  *
919  * ALGORITHM VERSION 1:
920  *
921  *	This algorithm breaks the filename down into a separate 32-bit crcs
922  *	for each filename segment separated by a special character (dot,
923  *	underscore, underline, or tilde).  The CRCs are then added together.
924  *	This allows temporary names.  A full-filename 16 bit crc is also
925  *	generated to deal with degenerate conditions.
926  *
927  *	The algorithm is designed to handle create/rename situations such
928  *	that a create with an extention to a rename without an extention
929  *	only shifts the key space rather than randomizes it.
930  *
931  *	NOTE: The inode allocator cache can only match 10 bits so we do
932  *	      not really have any room for a partial sorted name, and
933  *	      numbers don't sort well in that situation anyway.
934  *
935  *	0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
936  *
937  *
938  * We strip bit 63 in order to provide a positive key, this way a seek
939  * offset of 0 will represent the base of the directory.
940  *
941  * We usually strip bit 0 (set it to 0) in order to provide a consistent
942  * iteration space for collisions.
943  *
944  * This function can never return 0.  We use the MSB-0 space to synthesize
945  * artificial directory entries such as "." and "..".
946  */
947 int64_t
948 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
949 			 u_int32_t *max_iterationsp)
950 {
951 	const char *aname = name;
952 	int32_t crcx;
953 	int64_t key;
954 	int i;
955 	int j;
956 
957 	switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
958 	case HAMMER_INODE_CAP_DIRHASH_ALG0:
959 		/*
960 		 * Original algorithm
961 		 */
962 		key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
963 		if (key == 0)
964 			key |= 0x100000000LL;
965 		*max_iterationsp = 0xFFFFFFFFU;
966 		break;
967 	case HAMMER_INODE_CAP_DIRHASH_ALG1:
968 		/*
969 		 * Filesystem version 6 or better will create directories
970 		 * using the ALG1 dirhash.  This hash breaks the filename
971 		 * up into domains separated by special characters and
972 		 * hashes each domain independently.
973 		 *
974 		 * We also do a simple sub-sort using the first character
975 		 * of the filename in the top 5-bits.
976 		 */
977 		key = 0;
978 
979 		/*
980 		 * m32
981 		 */
982 		crcx = 0;
983 		for (i = j = 0; i < len; ++i) {
984 			if (aname[i] == '.' ||
985 			    aname[i] == '-' ||
986 			    aname[i] == '_' ||
987 			    aname[i] == '~') {
988 				if (i != j)
989 					crcx += crc32(aname + j, i - j);
990 				j = i + 1;
991 			}
992 		}
993 		if (i != j)
994 			crcx += crc32(aname + j, i - j);
995 
996 #if 0
997 		/*
998 		 * xor top 5 bits 0mmmm into low bits and steal the top 5
999 		 * bits as a semi sub sort using the first character of
1000 		 * the filename.  bit 63 is always left as 0 so directory
1001 		 * keys are positive numbers.
1002 		 */
1003 		crcx ^= (uint32_t)crcx >> (32 - 5);
1004 		crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1005 #endif
1006 		crcx &= 0x7FFFFFFFU;
1007 
1008 		key |= (uint64_t)crcx << 32;
1009 
1010 		/*
1011 		 * l16 - crc of entire filename
1012 		 *
1013 		 * This crc reduces degenerate hash collision conditions
1014 		 */
1015 		crcx = crc32(aname, len);
1016 		crcx = crcx ^ (crcx << 16);
1017 		key |= crcx & 0xFFFF0000U;
1018 
1019 		/*
1020 		 * Cleanup
1021 		 */
1022 		if ((key & 0xFFFFFFFF00000000LL) == 0)
1023 			key |= 0x100000000LL;
1024 		if (hammer_debug_general & 0x0400) {
1025 			kprintf("namekey2: 0x%016llx %*.*s\n",
1026 				(long long)key, len, len, aname);
1027 		}
1028 		*max_iterationsp = 0x00FFFFFF;
1029 		break;
1030 	case HAMMER_INODE_CAP_DIRHASH_ALG2:
1031 	case HAMMER_INODE_CAP_DIRHASH_ALG3:
1032 	default:
1033 		key = 0;			/* compiler warning */
1034 		*max_iterationsp = 1;		/* sanity */
1035 		panic("hammer_directory_namekey: bad algorithm %p", dip);
1036 		break;
1037 	}
1038 	return(key);
1039 }
1040 
1041 /*
1042  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1043  * EINVAL on failure.
1044  *
1045  * If this function fails *ispfs, *tidp, and *localizationp will not
1046  * be modified.
1047  */
1048 int
1049 hammer_str_to_tid(const char *str, int *ispfsp,
1050 		  hammer_tid_t *tidp, u_int32_t *localizationp)
1051 {
1052 	hammer_tid_t tid;
1053 	u_int32_t localization;
1054 	char *ptr;
1055 	int ispfs;
1056 	int n;
1057 
1058 	/*
1059 	 * Forms allowed for TID:  "0x%016llx"
1060 	 *			   "-1"
1061 	 */
1062 	tid = strtouq(str, &ptr, 0);
1063 	n = ptr - str;
1064 	if (n == 2 && str[0] == '-' && str[1] == '1') {
1065 		/* ok */
1066 	} else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1067 		/* ok */
1068 	} else {
1069 		return(EINVAL);
1070 	}
1071 
1072 	/*
1073 	 * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1074 	 */
1075 	str = ptr;
1076 	if (*str == ':') {
1077 		localization = strtoul(str + 1, &ptr, 10) << 16;
1078 		if (ptr - str != 6)
1079 			return(EINVAL);
1080 		str = ptr;
1081 		ispfs = 1;
1082 	} else {
1083 		localization = *localizationp;
1084 		ispfs = 0;
1085 	}
1086 
1087 	/*
1088 	 * Any trailing junk invalidates special extension handling.
1089 	 */
1090 	if (*str)
1091 		return(EINVAL);
1092 	*tidp = tid;
1093 	*localizationp = localization;
1094 	*ispfsp = ispfs;
1095 	return(0);
1096 }
1097 
1098 void
1099 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1100 {
1101 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1102 }
1103 
1104 void
1105 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1106 {
1107 	ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1108 			  crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1109 }
1110 
1111 int
1112 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1113 {
1114 	hammer_crc_t crc;
1115 
1116 	crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1117 	return (blockmap->entry_crc == crc);
1118 }
1119 
1120 int
1121 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1122 {
1123 	hammer_crc_t crc;
1124 
1125 	crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1126 	      crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1127 	return (ondisk->vol_crc == crc);
1128 }
1129 
1130 int
1131 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1132 {
1133 	hammer_crc_t crc;
1134 
1135 	crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1136 	return (ondisk->crc == crc);
1137 }
1138 
1139 /*
1140  * Test or set the leaf->data_crc field.  Deal with any special cases given
1141  * a generic B-Tree leaf element and its data.
1142  *
1143  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1144  *       to be updated in-place.
1145  */
1146 int
1147 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1148 {
1149 	hammer_crc_t crc;
1150 
1151 	if (leaf->data_len == 0) {
1152 		crc = 0;
1153 	} else {
1154 		switch(leaf->base.rec_type) {
1155 		case HAMMER_RECTYPE_INODE:
1156 			if (leaf->data_len != sizeof(struct hammer_inode_data))
1157 				return(0);
1158 			crc = crc32(data, HAMMER_INODE_CRCSIZE);
1159 			break;
1160 		default:
1161 			crc = crc32(data, leaf->data_len);
1162 			break;
1163 		}
1164 	}
1165 	return (leaf->data_crc == crc);
1166 }
1167 
1168 void
1169 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1170 {
1171 	if (leaf->data_len == 0) {
1172 		leaf->data_crc = 0;
1173 	} else {
1174 		switch(leaf->base.rec_type) {
1175 		case HAMMER_RECTYPE_INODE:
1176 			KKASSERT(leaf->data_len ==
1177 				  sizeof(struct hammer_inode_data));
1178 			leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1179 			break;
1180 		default:
1181 			leaf->data_crc = crc32(data, leaf->data_len);
1182 			break;
1183 		}
1184 	}
1185 }
1186 
1187 void
1188 hkprintf(const char *ctl, ...)
1189 {
1190 	__va_list va;
1191 
1192 	if (hammer_debug_debug) {
1193 		__va_start(va, ctl);
1194 		kvprintf(ctl, va);
1195 		__va_end(va);
1196 	}
1197 }
1198 
1199 /*
1200  * Return the block size at the specified file offset.
1201  */
1202 int
1203 hammer_blocksize(int64_t file_offset)
1204 {
1205 	if (file_offset < HAMMER_XDEMARC)
1206 		return(HAMMER_BUFSIZE);
1207 	else
1208 		return(HAMMER_XBUFSIZE);
1209 }
1210 
1211 int
1212 hammer_blockoff(int64_t file_offset)
1213 {
1214 	if (file_offset < HAMMER_XDEMARC)
1215 		return((int)file_offset & HAMMER_BUFMASK);
1216 	else
1217 		return((int)file_offset & HAMMER_XBUFMASK);
1218 }
1219 
1220 /*
1221  * Return the demarkation point between the two offsets where
1222  * the block size changes.
1223  */
1224 int64_t
1225 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1226 {
1227 	if (file_offset1 < HAMMER_XDEMARC) {
1228 		if (file_offset2 <= HAMMER_XDEMARC)
1229 			return(file_offset2);
1230 		return(HAMMER_XDEMARC);
1231 	}
1232 	panic("hammer_blockdemarc: illegal range %lld %lld",
1233 	      (long long)file_offset1, (long long)file_offset2);
1234 }
1235 
1236 udev_t
1237 hammer_fsid_to_udev(uuid_t *uuid)
1238 {
1239 	u_int32_t crc;
1240 
1241 	crc = crc32(uuid, sizeof(*uuid));
1242 	return((udev_t)crc);
1243 }
1244 
1245