xref: /dragonfly/sys/vfs/hammer/hammer_subs.c (revision dcd37f7d)
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_subs.c,v 1.35 2008/10/15 22:38:37 dillon Exp $
35  */
36 /*
37  * HAMMER structural locking
38  */
39 
40 #include "hammer.h"
41 #include <sys/dirent.h>
42 
43 void
44 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
45 {
46 	thread_t td = curthread;
47 	u_int lv;
48 	u_int nlv;
49 
50 	KKASSERT(lock->refs);
51 	for (;;) {
52 		lv = lock->lockval;
53 
54 		if (lv == 0) {
55 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
56 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
57 				lock->lowner = td;
58 				break;
59 			}
60 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
61 			   lock->lowner == td) {
62 			nlv = (lv + 1);
63 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
64 				break;
65 		} else {
66 			if (hammer_debug_locks) {
67 				kprintf("hammer_lock_ex: held by %p\n",
68 					lock->lowner);
69 			}
70 			nlv = lv | HAMMER_LOCKF_WANTED;
71 			++hammer_contention_count;
72 			tsleep_interlock(&lock->lockval, 0);
73 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
74 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
75 				if (hammer_debug_locks)
76 					kprintf("hammer_lock_ex: try again\n");
77 			}
78 		}
79 	}
80 }
81 
82 /*
83  * Try to obtain an exclusive lock
84  */
85 int
86 hammer_lock_ex_try(struct hammer_lock *lock)
87 {
88 	thread_t td = curthread;
89 	int error;
90 	u_int lv;
91 	u_int nlv;
92 
93 	KKASSERT(lock->refs);
94 	for (;;) {
95 		lv = lock->lockval;
96 
97 		if (lv == 0) {
98 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
99 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
100 				lock->lowner = td;
101 				error = 0;
102 				break;
103 			}
104 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
105 			   lock->lowner == td) {
106 			nlv = (lv + 1);
107 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
108 				error = 0;
109 				break;
110 			}
111 		} else {
112 			error = EAGAIN;
113 			break;
114 		}
115 	}
116 	return (error);
117 }
118 
119 /*
120  * Obtain a shared lock
121  *
122  * We do not give pending exclusive locks priority over shared locks as
123  * doing so could lead to a deadlock.
124  */
125 void
126 hammer_lock_sh(struct hammer_lock *lock)
127 {
128 	thread_t td = curthread;
129 	u_int lv;
130 	u_int nlv;
131 	const char *ident = "hmrlck";
132 
133 	KKASSERT(lock->refs);
134 	for (;;) {
135 		lv = lock->lockval;
136 
137 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
138 			nlv = (lv + 1);
139 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
140 				break;
141 		} else if (lock->lowner == td) {
142 			/*
143 			 * Disallowed case, drop into kernel debugger for
144 			 * now.  A cont continues w/ an exclusive lock.
145 			 */
146 			nlv = (lv + 1);
147 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
148 				if (hammer_debug_critical)
149 					Debugger("hammer_lock_sh: holding ex");
150 				break;
151 			}
152 		} else {
153 			nlv = lv | HAMMER_LOCKF_WANTED;
154 			++hammer_contention_count;
155 			tsleep_interlock(&lock->lockval, 0);
156 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
157 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
158 		}
159 	}
160 }
161 
162 int
163 hammer_lock_sh_try(struct hammer_lock *lock)
164 {
165 	thread_t td = curthread;
166 	u_int lv;
167 	u_int nlv;
168 	int error;
169 
170 	KKASSERT(lock->refs);
171 	for (;;) {
172 		lv = lock->lockval;
173 
174 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
175 			nlv = (lv + 1);
176 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
177 				error = 0;
178 				break;
179 			}
180 		} else if (lock->lowner == td) {
181 			/*
182 			 * Disallowed case, drop into kernel debugger for
183 			 * now.  A cont continues w/ an exclusive lock.
184 			 */
185 			nlv = (lv + 1);
186 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
187 				if (hammer_debug_critical)
188 					Debugger("hammer_lock_sh: holding ex");
189 				error = 0;
190 				break;
191 			}
192 		} else {
193 			error = EAGAIN;
194 			break;
195 		}
196 	}
197 	return (error);
198 }
199 
200 /*
201  * Upgrade a shared lock to an exclusively held lock.  This function will
202  * return EDEADLK If there is more then one shared holder.
203  *
204  * No error occurs and no action is taken if the lock is already exclusively
205  * held by the caller.  If the lock is not held at all or held exclusively
206  * by someone else, this function will panic.
207  */
208 int
209 hammer_lock_upgrade(struct hammer_lock *lock)
210 {
211 	thread_t td = curthread;
212 	u_int lv;
213 	u_int nlv;
214 	int error;
215 
216 	for (;;) {
217 		lv = lock->lockval;
218 
219 		if ((lv & ~HAMMER_LOCKF_WANTED) == 1) {
220 			nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
221 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
222 				lock->lowner = td;
223 				error = 0;
224 				break;
225 			}
226 		} else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
227 			if (lock->lowner != curthread)
228 				panic("hammer_lock_upgrade: illegal state");
229 			error = 0;
230 			break;
231 		} else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
232 			panic("hammer_lock_upgrade: lock is not held");
233 			/* NOT REACHED */
234 			error = EDEADLK;
235 			break;
236 		} else {
237 			error = EDEADLK;
238 			break;
239 		}
240 	}
241 	return (error);
242 }
243 
244 /*
245  * Downgrade an exclusively held lock to a shared lock.
246  */
247 void
248 hammer_lock_downgrade(struct hammer_lock *lock)
249 {
250 	thread_t td __debugvar = curthread;
251 	u_int lv;
252 	u_int nlv;
253 
254 	KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
255 		 (HAMMER_LOCKF_EXCLUSIVE | 1));
256 	KKASSERT(lock->lowner == td);
257 
258 	/*
259 	 * NOTE: Must clear owner before releasing exclusivity
260 	 */
261 	lock->lowner = NULL;
262 
263 	for (;;) {
264 		lv = lock->lockval;
265 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
266 		if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
267 			if (lv & HAMMER_LOCKF_WANTED)
268 				wakeup(&lock->lockval);
269 			break;
270 		}
271 	}
272 }
273 
274 void
275 hammer_unlock(struct hammer_lock *lock)
276 {
277 	thread_t td __debugvar = curthread;
278 	u_int lv;
279 	u_int nlv;
280 
281 	lv = lock->lockval;
282 	KKASSERT(lv != 0);
283 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
284 		KKASSERT(lock->lowner == td);
285 
286 	for (;;) {
287 		lv = lock->lockval;
288 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
289 		if (nlv > 1) {
290 			nlv = lv - 1;
291 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
292 				break;
293 		} else if (nlv == 1) {
294 			nlv = 0;
295 			if (lv & HAMMER_LOCKF_EXCLUSIVE)
296 				lock->lowner = NULL;
297 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
298 				if (lv & HAMMER_LOCKF_WANTED)
299 					wakeup(&lock->lockval);
300 				break;
301 			}
302 		} else {
303 			panic("hammer_unlock: lock %p is not held", lock);
304 		}
305 	}
306 }
307 
308 /*
309  * The calling thread must be holding a shared or exclusive lock.
310  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
311  */
312 int
313 hammer_lock_status(struct hammer_lock *lock)
314 {
315 	u_int lv = lock->lockval;
316 
317 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
318 		return(1);
319 	else if (lv)
320 		return(-1);
321 	panic("hammer_lock_status: lock must be held: %p", lock);
322 }
323 
324 /*
325  * Bump the ref count for a lock (not the excl/share count, but a separate
326  * structural reference count).  The CHECK flag will be set on a 0->1
327  * transition.
328  *
329  * This function does nothing to serialize races between multple threads.
330  * The caller can interlock it later on to deal with serialization.
331  *
332  * MPSAFE
333  */
334 void
335 hammer_ref(struct hammer_lock *lock)
336 {
337 	u_int lv;
338 	u_int nlv;
339 
340 	for (;;) {
341 		lv = lock->refs;
342 		if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
343 			nlv = (lv + 1) | HAMMER_REFS_CHECK;
344 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
345 				return;
346 		} else {
347 			nlv = (lv + 1);
348 			KKASSERT((int)nlv > 0);
349 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
350 				return;
351 		}
352 	}
353 	/* not reached */
354 }
355 
356 /*
357  * Drop the ref count for a lock (not the excl/share count, but a separate
358  * structural reference count).  The CHECK flag will be cleared on a 1->0
359  * transition.
360  *
361  * This function does nothing to serialize races between multple threads.
362  *
363  * MPSAFE
364  */
365 void
366 hammer_rel(struct hammer_lock *lock)
367 {
368 	u_int lv;
369 	u_int nlv;
370 
371 	for (;;) {
372 		lv = lock->refs;
373 		if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
374 			nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
375 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
376 				return;
377 		} else {
378 			KKASSERT((int)lv > 0);
379 			nlv = (lv - 1);
380 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
381 				return;
382 		}
383 	}
384 	/* not reached */
385 }
386 
387 /*
388  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
389  * more sophisticated versions which handle MP transition races and block
390  * when necessary.
391  *
392  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
393  * the interlock for 0->1 transitions or if the CHECK is found to be set.
394  *
395  * This case will return TRUE, the interlock will be held, and the CHECK
396  * bit also set.  Other threads attempting to ref will see the CHECK bit
397  * and block until we clean up.
398  *
399  * FALSE is returned for transitions other than 0->1 when the CHECK bit
400  * is not found to be set, or if the function loses the race with another
401  * thread.
402  *
403  * TRUE is only returned to one thread and the others will block.
404  * Effectively a TRUE indicator means 'someone transitioned 0->1
405  * and you are the first guy to successfully lock it after that, so you
406  * need to check'.  Due to races the ref-count may be greater than 1 upon
407  * return.
408  *
409  * MPSAFE
410  */
411 int
412 hammer_ref_interlock(struct hammer_lock *lock)
413 {
414 	u_int lv;
415 	u_int nlv;
416 
417 	/*
418 	 * Integrated reference count bump, lock, and check, with hot-path.
419 	 *
420 	 * (a) Return 1	(+LOCKED, +CHECK)	0->1 transition
421 	 * (b) Return 0 (-LOCKED, -CHECK)	N->N+1 transition
422 	 * (c) Break out (+CHECK)		Check condition and Cannot lock
423 	 * (d) Return 1 (+LOCKED, +CHECK)	Successfully locked
424 	 */
425 	for (;;) {
426 		lv = lock->refs;
427 		if (lv == 0) {
428 			nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
429 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
430 				lock->rowner = curthread;
431 				return(1);
432 			}
433 		} else {
434 			nlv = (lv + 1);
435 			if ((lv & ~HAMMER_REFS_FLAGS) == 0)
436 				nlv |= HAMMER_REFS_CHECK;
437 			if ((nlv & HAMMER_REFS_CHECK) == 0) {
438 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
439 					return(0);
440 			} else if (lv & HAMMER_REFS_LOCKED) {
441 				/* CHECK also set here */
442 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
443 					break;
444 			} else {
445 				/* CHECK also set here */
446 				nlv |= HAMMER_REFS_LOCKED;
447 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
448 					lock->rowner = curthread;
449 					return(1);
450 				}
451 			}
452 		}
453 	}
454 
455 	/*
456 	 * Defered check condition because we were unable to acquire the
457 	 * lock.  We must block until the check condition is cleared due
458 	 * to a race with another thread, or we are able to acquire the
459 	 * lock.
460 	 *
461 	 * (a) Return 0	(-CHECK)		Another thread handled it
462 	 * (b) Return 1 (+LOCKED, +CHECK)	We handled it.
463 	 */
464 	for (;;) {
465 		lv = lock->refs;
466 		if ((lv & HAMMER_REFS_CHECK) == 0)
467 			return(0);
468 		if (lv & HAMMER_REFS_LOCKED) {
469 			tsleep_interlock(&lock->refs, 0);
470 			nlv = (lv | HAMMER_REFS_WANTED);
471 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
472 				tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
473 		} else {
474 			/* CHECK also set here */
475 			nlv = lv | HAMMER_REFS_LOCKED;
476 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
477 				lock->rowner = curthread;
478 				return(1);
479 			}
480 		}
481 	}
482 	/* not reached */
483 }
484 
485 /*
486  * This is the same as hammer_ref_interlock() but asserts that the
487  * 0->1 transition is always true, thus the lock must have no references
488  * on entry or have CHECK set, and will have one reference with the
489  * interlock held on return.  It must also not be interlocked on entry
490  * by anyone.
491  *
492  * NOTE that CHECK will never be found set when the ref-count is 0.
493  *
494  * TRUE is always returned to match the API for hammer_ref_interlock().
495  * This function returns with one ref, the lock held, and the CHECK bit set.
496  */
497 int
498 hammer_ref_interlock_true(struct hammer_lock *lock)
499 {
500 	u_int lv;
501 	u_int nlv;
502 
503 	for (;;) {
504 		lv = lock->refs;
505 
506 		if (lv) {
507 			panic("hammer_ref_interlock_true: bad lock %p %08x\n",
508 			      lock, lock->refs);
509 		}
510 		nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
511 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
512 			lock->rowner = curthread;
513 			return (1);
514 		}
515 	}
516 }
517 
518 /*
519  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
520  * CHECK flag.  The ref-count remains unchanged.
521  *
522  * This routine is called in the load path when the load succeeds.
523  */
524 void
525 hammer_ref_interlock_done(struct hammer_lock *lock)
526 {
527 	u_int lv;
528 	u_int nlv;
529 
530 	for (;;) {
531 		lv = lock->refs;
532 		nlv = lv & ~HAMMER_REFS_FLAGS;
533 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
534 			if (lv & HAMMER_REFS_WANTED)
535 				wakeup(&lock->refs);
536 			break;
537 		}
538 	}
539 }
540 
541 /*
542  * hammer_rel_interlock() works a bit differently in that it must
543  * acquire the lock in tandem with a 1->0 transition.  CHECK is
544  * not used.
545  *
546  * TRUE is returned on 1->0 transitions with the lock held on return
547  * and FALSE is returned otherwise with the lock not held.
548  *
549  * It is important to note that the refs are not stable and may
550  * increase while we hold the lock, the TRUE indication only means
551  * that we transitioned 1->0, not necessarily that we stayed at 0.
552  *
553  * Another thread bumping refs while we hold the lock will set CHECK,
554  * causing one of the competing hammer_ref_interlock() calls to
555  * return TRUE after we release our lock.
556  *
557  * MPSAFE
558  */
559 int
560 hammer_rel_interlock(struct hammer_lock *lock, int locked)
561 {
562 	u_int lv;
563 	u_int nlv;
564 
565 	/*
566 	 * In locked mode (failure/unload path) we release the
567 	 * ref-count but leave it locked.
568 	 */
569 	if (locked) {
570 		hammer_rel(lock);
571 		return(1);
572 	}
573 
574 	/*
575 	 * Integrated reference count drop with LOCKED, plus the hot-path
576 	 * returns.
577 	 */
578 	for (;;) {
579 		lv = lock->refs;
580 
581 		if (lv == 1) {
582 			nlv = 0 | HAMMER_REFS_LOCKED;
583 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
584 				lock->rowner = curthread;
585 				return(1);
586 			}
587 		} else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
588 			if ((lv & HAMMER_REFS_LOCKED) == 0) {
589 				nlv = (lv - 1) | HAMMER_REFS_LOCKED;
590 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
591 					lock->rowner = curthread;
592 					return(1);
593 				}
594 			} else {
595 				nlv = lv | HAMMER_REFS_WANTED;
596 				tsleep_interlock(&lock->refs, 0);
597 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
598 					tsleep(&lock->refs, PINTERLOCKED,
599 					       "h0lk", 0);
600 				}
601 			}
602 		} else {
603 			nlv = (lv - 1);
604 			KKASSERT((int)nlv >= 0);
605 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
606 				return(0);
607 		}
608 	}
609 	/* not reached */
610 }
611 
612 /*
613  * Unlock the interlock acquired by hammer_rel_interlock().
614  *
615  * If orig_locked is non-zero the interlock was originally held prior to
616  * the hammer_rel_interlock() call and passed through to us.  In this
617  * case we want to retain the CHECK error state if not transitioning
618  * to 0.
619  *
620  * The code is the same either way so we do not have to conditionalize
621  * on orig_locked.
622  */
623 void
624 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
625 {
626 	u_int lv;
627 	u_int nlv;
628 
629 	for (;;) {
630 		lv = lock->refs;
631 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
632 		if ((lv & ~HAMMER_REFS_FLAGS) == 0)
633 			nlv &= ~HAMMER_REFS_CHECK;
634 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
635 			if (lv & HAMMER_REFS_WANTED)
636 				wakeup(&lock->refs);
637 			break;
638 		}
639 	}
640 }
641 
642 /*
643  * Acquire the interlock on lock->refs.
644  *
645  * Return TRUE if CHECK is currently set.  Note that CHECK will not
646  * be set if the reference count is 0, but can get set if this function
647  * is preceeded by, say, hammer_ref(), or through races with other
648  * threads.  The return value allows the caller to use the same logic
649  * as hammer_ref_interlock().
650  *
651  * MPSAFE
652  */
653 int
654 hammer_get_interlock(struct hammer_lock *lock)
655 {
656 	u_int lv;
657 	u_int nlv;
658 
659 	for (;;) {
660 		lv = lock->refs;
661 		if (lv & HAMMER_REFS_LOCKED) {
662 			nlv = lv | HAMMER_REFS_WANTED;
663 			tsleep_interlock(&lock->refs, 0);
664 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
665 				tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
666 		} else {
667 			nlv = (lv | HAMMER_REFS_LOCKED);
668 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
669 				lock->rowner = curthread;
670 				return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
671 			}
672 		}
673 	}
674 }
675 
676 /*
677  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
678  * cache callback code to disassociate or lock the bufs related to HAMMER
679  * structures.
680  *
681  * During teardown the related bp will be acquired by hammer_io_release()
682  * which interocks our test.
683  *
684  * Returns non-zero on success, zero on failure.
685  */
686 int
687 hammer_try_interlock_norefs(struct hammer_lock *lock)
688 {
689 	u_int lv;
690 	u_int nlv;
691 
692 	for (;;) {
693 		lv = lock->refs;
694 		if (lv == 0) {
695 			nlv = lv | HAMMER_REFS_LOCKED;
696 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
697 				lock->rowner = curthread;
698 				return(1);
699 			}
700 		} else {
701 			return(0);
702 		}
703 	}
704 	/* not reached */
705 }
706 
707 /*
708  * Release the interlock on lock->refs.  This function will set
709  * CHECK if the refs is non-zero and error is non-zero, and clear
710  * CHECK otherwise.
711  *
712  * MPSAFE
713  */
714 void
715 hammer_put_interlock(struct hammer_lock *lock, int error)
716 {
717 	u_int lv;
718 	u_int nlv;
719 
720 	for (;;) {
721 		lv = lock->refs;
722 		KKASSERT(lv & HAMMER_REFS_LOCKED);
723 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
724 
725 		if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
726 			nlv &= ~HAMMER_REFS_CHECK;
727 		else
728 			nlv |= HAMMER_REFS_CHECK;
729 
730 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
731 			if (lv & HAMMER_REFS_WANTED)
732 				wakeup(&lock->refs);
733 			return;
734 		}
735 	}
736 }
737 
738 /*
739  * The sync_lock must be held when doing any modifying operations on
740  * meta-data.  It does not have to be held when modifying non-meta-data buffers
741  * (backend or frontend).
742  *
743  * The flusher holds the lock exclusively while all other consumers hold it
744  * shared.  All modifying operations made while holding the lock are atomic
745  * in that they will be made part of the same flush group.
746  *
747  * Due to the atomicy requirement deadlock recovery code CANNOT release the
748  * sync lock, nor can we give pending exclusive sync locks priority over
749  * a shared sync lock as this could lead to a 3-way deadlock.
750  */
751 void
752 hammer_sync_lock_ex(hammer_transaction_t trans)
753 {
754 	++trans->sync_lock_refs;
755 	hammer_lock_ex(&trans->hmp->sync_lock);
756 }
757 
758 void
759 hammer_sync_lock_sh(hammer_transaction_t trans)
760 {
761 	++trans->sync_lock_refs;
762 	hammer_lock_sh(&trans->hmp->sync_lock);
763 }
764 
765 int
766 hammer_sync_lock_sh_try(hammer_transaction_t trans)
767 {
768 	int error;
769 
770 	++trans->sync_lock_refs;
771 	if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
772 		--trans->sync_lock_refs;
773 	return (error);
774 }
775 
776 void
777 hammer_sync_unlock(hammer_transaction_t trans)
778 {
779 	--trans->sync_lock_refs;
780 	hammer_unlock(&trans->hmp->sync_lock);
781 }
782 
783 /*
784  * Misc
785  */
786 u_int32_t
787 hammer_to_unix_xid(uuid_t *uuid)
788 {
789 	return(*(u_int32_t *)&uuid->node[2]);
790 }
791 
792 void
793 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
794 {
795 	bzero(uuid, sizeof(*uuid));
796 	*(u_int32_t *)&uuid->node[2] = guid;
797 }
798 
799 void
800 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
801 {
802 	ts->tv_sec = (unsigned long)(xtime / 1000000);
803 	ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
804 }
805 
806 u_int64_t
807 hammer_timespec_to_time(struct timespec *ts)
808 {
809 	u_int64_t xtime;
810 
811 	xtime = (unsigned)(ts->tv_nsec / 1000) +
812 		(unsigned long)ts->tv_sec * 1000000ULL;
813 	return(xtime);
814 }
815 
816 
817 /*
818  * Convert a HAMMER filesystem object type to a vnode type
819  */
820 enum vtype
821 hammer_get_vnode_type(u_int8_t obj_type)
822 {
823 	switch(obj_type) {
824 	case HAMMER_OBJTYPE_DIRECTORY:
825 		return(VDIR);
826 	case HAMMER_OBJTYPE_REGFILE:
827 		return(VREG);
828 	case HAMMER_OBJTYPE_DBFILE:
829 		return(VDATABASE);
830 	case HAMMER_OBJTYPE_FIFO:
831 		return(VFIFO);
832 	case HAMMER_OBJTYPE_SOCKET:
833 		return(VSOCK);
834 	case HAMMER_OBJTYPE_CDEV:
835 		return(VCHR);
836 	case HAMMER_OBJTYPE_BDEV:
837 		return(VBLK);
838 	case HAMMER_OBJTYPE_SOFTLINK:
839 		return(VLNK);
840 	default:
841 		return(VBAD);
842 	}
843 	/* not reached */
844 }
845 
846 int
847 hammer_get_dtype(u_int8_t obj_type)
848 {
849 	switch(obj_type) {
850 	case HAMMER_OBJTYPE_DIRECTORY:
851 		return(DT_DIR);
852 	case HAMMER_OBJTYPE_REGFILE:
853 		return(DT_REG);
854 	case HAMMER_OBJTYPE_DBFILE:
855 		return(DT_DBF);
856 	case HAMMER_OBJTYPE_FIFO:
857 		return(DT_FIFO);
858 	case HAMMER_OBJTYPE_SOCKET:
859 		return(DT_SOCK);
860 	case HAMMER_OBJTYPE_CDEV:
861 		return(DT_CHR);
862 	case HAMMER_OBJTYPE_BDEV:
863 		return(DT_BLK);
864 	case HAMMER_OBJTYPE_SOFTLINK:
865 		return(DT_LNK);
866 	default:
867 		return(DT_UNKNOWN);
868 	}
869 	/* not reached */
870 }
871 
872 u_int8_t
873 hammer_get_obj_type(enum vtype vtype)
874 {
875 	switch(vtype) {
876 	case VDIR:
877 		return(HAMMER_OBJTYPE_DIRECTORY);
878 	case VREG:
879 		return(HAMMER_OBJTYPE_REGFILE);
880 	case VDATABASE:
881 		return(HAMMER_OBJTYPE_DBFILE);
882 	case VFIFO:
883 		return(HAMMER_OBJTYPE_FIFO);
884 	case VSOCK:
885 		return(HAMMER_OBJTYPE_SOCKET);
886 	case VCHR:
887 		return(HAMMER_OBJTYPE_CDEV);
888 	case VBLK:
889 		return(HAMMER_OBJTYPE_BDEV);
890 	case VLNK:
891 		return(HAMMER_OBJTYPE_SOFTLINK);
892 	default:
893 		return(HAMMER_OBJTYPE_UNKNOWN);
894 	}
895 	/* not reached */
896 }
897 
898 /*
899  * Return flags for hammer_delete_at_cursor()
900  */
901 int
902 hammer_nohistory(hammer_inode_t ip)
903 {
904 	if (ip->hmp->hflags & HMNT_NOHISTORY)
905 		return(HAMMER_DELETE_DESTROY);
906 	if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
907 		return(HAMMER_DELETE_DESTROY);
908 	return(0);
909 }
910 
911 /*
912  * ALGORITHM VERSION 1:
913  *	Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
914  *	crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
915  *	to generate a unique key and will scan all entries with the same upper
916  *	32 bits when issuing a lookup.
917  *
918  *	0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
919  *
920  * ALGORITHM VERSION 2:
921  *
922  *	The 64 bit hash key is generated from the following components.  The
923  *	first three characters are encoded as 5-bit quantities, the middle
924  *	N characters are hashed into a 6 bit quantity, and the last two
925  *	characters are encoded as 5-bit quantities.  A 32 bit hash of the
926  *	entire filename is encoded in the low 32 bits.  Bit 0 is set to
927  *	0 to guarantee us a 2^24 bit iteration space.
928  *
929  *	0aaaaabbbbbccccc mmmmmmyyyyyzzzzz hhhhhhhhhhhhhhhh hhhhhhhhhhhhhhh0
930  *
931  *	This gives us a domain sort for the first three characters, the last
932  *	two characters, and breaks the middle space into 64 random domains.
933  *	The domain sort folds upper case, lower case, digits, and punctuation
934  *	spaces together, the idea being the filenames tend to not be a mix
935  *	of those domains.
936  *
937  *	The 64 random domains act as a sub-sort for the middle characters
938  *	but may cause a random seek.  If the filesystem is being accessed
939  *	in sorted order we should tend to get very good linearity for most
940  *	filenames and devolve into more random seeks otherwise.
941  *
942  * We strip bit 63 in order to provide a positive key, this way a seek
943  * offset of 0 will represent the base of the directory.
944  *
945  * This function can never return 0.  We use the MSB-0 space to synthesize
946  * artificial directory entries such as "." and "..".
947  */
948 int64_t
949 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
950 			 u_int32_t *max_iterationsp)
951 {
952 	int64_t key;
953 	int32_t crcx;
954 	const char *aname = name;
955 
956 	switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
957 	case HAMMER_INODE_CAP_DIRHASH_ALG0:
958 		key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
959 		if (key == 0)
960 			key |= 0x100000000LL;
961 		*max_iterationsp = 0xFFFFFFFFU;
962 		break;
963 	case HAMMER_INODE_CAP_DIRHASH_ALG1:
964 		key = (u_int32_t)crc32(aname, len) & 0xFFFFFFFEU;
965 
966 		switch(len) {
967 		default:
968 			crcx = crc32(aname + 3, len - 5);
969 			crcx = crcx ^ (crcx >> 6) ^ (crcx >> 12);
970 			key |=  (int64_t)(crcx & 0x3F) << 42;
971 			/* fall through */
972 		case 5:
973 		case 4:
974 			/* fall through */
975 		case 3:
976 			key |= ((int64_t)(aname[2] & 0x1F) << 48);
977 			/* fall through */
978 		case 2:
979 			key |= ((int64_t)(aname[1] & 0x1F) << 53) |
980 			       ((int64_t)(aname[len-2] & 0x1F) << 37);
981 			/* fall through */
982 		case 1:
983 			key |= ((int64_t)(aname[0] & 0x1F) << 58) |
984 			       ((int64_t)(aname[len-1] & 0x1F) << 32);
985 			/* fall through */
986 		case 0:
987 			break;
988 		}
989 		if ((key & 0xFFFFFFFF00000000LL) == 0)
990 			key |= 0x100000000LL;
991 		if (hammer_debug_general & 0x0400) {
992 			kprintf("namekey2: 0x%016llx %*.*s\n",
993 				(long long)key, len, len, aname);
994 		}
995 		*max_iterationsp = 0x00FFFFFF;
996 		break;
997 	case HAMMER_INODE_CAP_DIRHASH_ALG2:
998 	case HAMMER_INODE_CAP_DIRHASH_ALG3:
999 	default:
1000 		key = 0;			/* compiler warning */
1001 		*max_iterationsp = 1;		/* sanity */
1002 		panic("hammer_directory_namekey: bad algorithm %p\n", dip);
1003 		break;
1004 	}
1005 	return(key);
1006 }
1007 
1008 /*
1009  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1010  * EINVAL on failure.
1011  *
1012  * If this function fails *ispfs, *tidp, and *localizationp will not
1013  * be modified.
1014  */
1015 int
1016 hammer_str_to_tid(const char *str, int *ispfsp,
1017 		  hammer_tid_t *tidp, u_int32_t *localizationp)
1018 {
1019 	hammer_tid_t tid;
1020 	u_int32_t localization;
1021 	char *ptr;
1022 	int ispfs;
1023 	int n;
1024 
1025 	/*
1026 	 * Forms allowed for TID:  "0x%016llx"
1027 	 *			   "-1"
1028 	 */
1029 	tid = strtouq(str, &ptr, 0);
1030 	n = ptr - str;
1031 	if (n == 2 && str[0] == '-' && str[1] == '1') {
1032 		/* ok */
1033 	} else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1034 		/* ok */
1035 	} else {
1036 		return(EINVAL);
1037 	}
1038 
1039 	/*
1040 	 * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1041 	 */
1042 	str = ptr;
1043 	if (*str == ':') {
1044 		localization = strtoul(str + 1, &ptr, 10) << 16;
1045 		if (ptr - str != 6)
1046 			return(EINVAL);
1047 		str = ptr;
1048 		ispfs = 1;
1049 	} else {
1050 		localization = *localizationp;
1051 		ispfs = 0;
1052 	}
1053 
1054 	/*
1055 	 * Any trailing junk invalidates special extension handling.
1056 	 */
1057 	if (*str)
1058 		return(EINVAL);
1059 	*tidp = tid;
1060 	*localizationp = localization;
1061 	*ispfsp = ispfs;
1062 	return(0);
1063 }
1064 
1065 void
1066 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1067 {
1068 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1069 }
1070 
1071 void
1072 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1073 {
1074 	ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1075 			  crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1076 }
1077 
1078 int
1079 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1080 {
1081 	hammer_crc_t crc;
1082 
1083 	crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1084 	return (blockmap->entry_crc == crc);
1085 }
1086 
1087 int
1088 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1089 {
1090 	hammer_crc_t crc;
1091 
1092 	crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1093 	      crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1094 	return (ondisk->vol_crc == crc);
1095 }
1096 
1097 int
1098 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1099 {
1100 	hammer_crc_t crc;
1101 
1102 	crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1103 	return (ondisk->crc == crc);
1104 }
1105 
1106 /*
1107  * Test or set the leaf->data_crc field.  Deal with any special cases given
1108  * a generic B-Tree leaf element and its data.
1109  *
1110  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1111  *       to be updated in-place.
1112  */
1113 int
1114 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1115 {
1116 	hammer_crc_t crc;
1117 
1118 	if (leaf->data_len == 0) {
1119 		crc = 0;
1120 	} else {
1121 		switch(leaf->base.rec_type) {
1122 		case HAMMER_RECTYPE_INODE:
1123 			if (leaf->data_len != sizeof(struct hammer_inode_data))
1124 				return(0);
1125 			crc = crc32(data, HAMMER_INODE_CRCSIZE);
1126 			break;
1127 		default:
1128 			crc = crc32(data, leaf->data_len);
1129 			break;
1130 		}
1131 	}
1132 	return (leaf->data_crc == crc);
1133 }
1134 
1135 void
1136 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1137 {
1138 	if (leaf->data_len == 0) {
1139 		leaf->data_crc = 0;
1140 	} else {
1141 		switch(leaf->base.rec_type) {
1142 		case HAMMER_RECTYPE_INODE:
1143 			KKASSERT(leaf->data_len ==
1144 				  sizeof(struct hammer_inode_data));
1145 			leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1146 			break;
1147 		default:
1148 			leaf->data_crc = crc32(data, leaf->data_len);
1149 			break;
1150 		}
1151 	}
1152 }
1153 
1154 void
1155 hkprintf(const char *ctl, ...)
1156 {
1157 	__va_list va;
1158 
1159 	if (hammer_debug_debug) {
1160 		__va_start(va, ctl);
1161 		kvprintf(ctl, va);
1162 		__va_end(va);
1163 	}
1164 }
1165 
1166 /*
1167  * Return the block size at the specified file offset.
1168  */
1169 int
1170 hammer_blocksize(int64_t file_offset)
1171 {
1172 	if (file_offset < HAMMER_XDEMARC)
1173 		return(HAMMER_BUFSIZE);
1174 	else
1175 		return(HAMMER_XBUFSIZE);
1176 }
1177 
1178 int
1179 hammer_blockoff(int64_t file_offset)
1180 {
1181 	if (file_offset < HAMMER_XDEMARC)
1182 		return((int)file_offset & HAMMER_BUFMASK);
1183 	else
1184 		return((int)file_offset & HAMMER_XBUFMASK);
1185 }
1186 
1187 /*
1188  * Return the demarkation point between the two offsets where
1189  * the block size changes.
1190  */
1191 int64_t
1192 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1193 {
1194 	if (file_offset1 < HAMMER_XDEMARC) {
1195 		if (file_offset2 <= HAMMER_XDEMARC)
1196 			return(file_offset2);
1197 		return(HAMMER_XDEMARC);
1198 	}
1199 	panic("hammer_blockdemarc: illegal range %lld %lld\n",
1200 	      (long long)file_offset1, (long long)file_offset2);
1201 }
1202 
1203 udev_t
1204 hammer_fsid_to_udev(uuid_t *uuid)
1205 {
1206 	u_int32_t crc;
1207 
1208 	crc = crc32(uuid, sizeof(*uuid));
1209 	return((udev_t)crc);
1210 }
1211 
1212