xref: /dragonfly/sys/vfs/hammer/hammer_subs.c (revision 0ca59c34)
1 /*
2  * Copyright (c) 2007-2011 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * HAMMER structural locking
36  */
37 
38 #include "hammer.h"
39 
40 void
41 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
42 {
43 	thread_t td = curthread;
44 	u_int lv;
45 	u_int nlv;
46 
47 	KKASSERT(lock->refs);
48 	for (;;) {
49 		lv = lock->lockval;
50 
51 		if (lv == 0) {
52 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
53 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
54 				lock->lowner = td;
55 				break;
56 			}
57 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
58 			   lock->lowner == td) {
59 			nlv = (lv + 1);
60 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
61 				break;
62 		} else {
63 			if (hammer_debug_locks) {
64 				hdkprintf("held by %p\n", lock->lowner);
65 			}
66 			nlv = lv | HAMMER_LOCKF_WANTED;
67 			++hammer_contention_count;
68 			tsleep_interlock(&lock->lockval, 0);
69 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
70 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
71 				if (hammer_debug_locks)
72 					hdkprintf("try again\n");
73 			}
74 		}
75 	}
76 }
77 
78 /*
79  * Try to obtain an exclusive lock
80  */
81 int
82 hammer_lock_ex_try(struct hammer_lock *lock)
83 {
84 	thread_t td = curthread;
85 	int error;
86 	u_int lv;
87 	u_int nlv;
88 
89 	KKASSERT(lock->refs);
90 	for (;;) {
91 		lv = lock->lockval;
92 
93 		if (lv == 0) {
94 			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
95 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
96 				lock->lowner = td;
97 				error = 0;
98 				break;
99 			}
100 		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
101 			   lock->lowner == td) {
102 			nlv = (lv + 1);
103 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
104 				error = 0;
105 				break;
106 			}
107 		} else {
108 			error = EAGAIN;
109 			break;
110 		}
111 	}
112 	return (error);
113 }
114 
115 /*
116  * Obtain a shared lock
117  *
118  * We do not give pending exclusive locks priority over shared locks as
119  * doing so could lead to a deadlock.
120  */
121 void
122 hammer_lock_sh(struct hammer_lock *lock)
123 {
124 	thread_t td = curthread;
125 	u_int lv;
126 	u_int nlv;
127 	const char *ident = "hmrlck";
128 
129 	KKASSERT(lock->refs);
130 	for (;;) {
131 		lv = lock->lockval;
132 
133 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
134 			nlv = (lv + 1);
135 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
136 				break;
137 		} else if (lock->lowner == td) {
138 			/*
139 			 * Disallowed case, drop into kernel debugger for
140 			 * now.  A cont continues w/ an exclusive lock.
141 			 */
142 			nlv = (lv + 1);
143 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
144 				if (hammer_debug_critical)
145 					Debugger("hammer_lock_sh: holding ex");
146 				break;
147 			}
148 		} else {
149 			nlv = lv | HAMMER_LOCKF_WANTED;
150 			++hammer_contention_count;
151 			tsleep_interlock(&lock->lockval, 0);
152 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
153 				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
154 		}
155 	}
156 }
157 
158 int
159 hammer_lock_sh_try(struct hammer_lock *lock)
160 {
161 	thread_t td = curthread;
162 	u_int lv;
163 	u_int nlv;
164 	int error;
165 
166 	KKASSERT(lock->refs);
167 	for (;;) {
168 		lv = lock->lockval;
169 
170 		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
171 			nlv = (lv + 1);
172 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
173 				error = 0;
174 				break;
175 			}
176 		} else if (lock->lowner == td) {
177 			/*
178 			 * Disallowed case, drop into kernel debugger for
179 			 * now.  A cont continues w/ an exclusive lock.
180 			 */
181 			nlv = (lv + 1);
182 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
183 				if (hammer_debug_critical)
184 					Debugger("hammer_lock_sh: holding ex");
185 				error = 0;
186 				break;
187 			}
188 		} else {
189 			error = EAGAIN;
190 			break;
191 		}
192 	}
193 	return (error);
194 }
195 
196 /*
197  * Upgrade a shared lock to an exclusively held lock.  This function will
198  * return EDEADLK If there is more then one shared holder.
199  *
200  * No error occurs and no action is taken if the lock is already exclusively
201  * held by the caller.  If the lock is not held at all or held exclusively
202  * by someone else, this function will panic.
203  */
204 int
205 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
206 {
207 	thread_t td = curthread;
208 	u_int lv;
209 	u_int nlv;
210 	int error;
211 
212 	for (;;) {
213 		lv = lock->lockval;
214 
215 		if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
216 			nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
217 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
218 				lock->lowner = td;
219 				error = 0;
220 				break;
221 			}
222 		} else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
223 			if (lock->lowner != curthread)
224 				hpanic("illegal state");
225 			error = 0;
226 			break;
227 		} else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
228 			hpanic("lock is not held");
229 			/* NOT REACHED */
230 			error = EDEADLK;
231 			break;
232 		} else {
233 			error = EDEADLK;
234 			break;
235 		}
236 	}
237 	return (error);
238 }
239 
240 /*
241  * Downgrade an exclusively held lock to a shared lock.
242  */
243 void
244 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
245 {
246 	thread_t td __debugvar = curthread;
247 	u_int lv;
248 	u_int nlv;
249 
250 	KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
251 		 (HAMMER_LOCKF_EXCLUSIVE | shcount));
252 	KKASSERT(lock->lowner == td);
253 
254 	/*
255 	 * NOTE: Must clear owner before releasing exclusivity
256 	 */
257 	lock->lowner = NULL;
258 
259 	for (;;) {
260 		lv = lock->lockval;
261 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
262 		if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
263 			if (lv & HAMMER_LOCKF_WANTED)
264 				wakeup(&lock->lockval);
265 			break;
266 		}
267 	}
268 }
269 
270 void
271 hammer_unlock(struct hammer_lock *lock)
272 {
273 	thread_t td __debugvar = curthread;
274 	u_int lv;
275 	u_int nlv;
276 
277 	lv = lock->lockval;
278 	KKASSERT(lv != 0);
279 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
280 		KKASSERT(lock->lowner == td);
281 
282 	for (;;) {
283 		lv = lock->lockval;
284 		nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
285 		if (nlv > 1) {
286 			nlv = lv - 1;
287 			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
288 				break;
289 		} else if (nlv == 1) {
290 			nlv = 0;
291 			if (lv & HAMMER_LOCKF_EXCLUSIVE)
292 				lock->lowner = NULL;
293 			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
294 				if (lv & HAMMER_LOCKF_WANTED)
295 					wakeup(&lock->lockval);
296 				break;
297 			}
298 		} else {
299 			hpanic("lock %p is not held", lock);
300 		}
301 	}
302 }
303 
304 /*
305  * The calling thread must be holding a shared or exclusive lock.
306  * Returns < 0 if lock is held shared, and > 0 if held exlusively.
307  */
308 int
309 hammer_lock_status(struct hammer_lock *lock)
310 {
311 	u_int lv = lock->lockval;
312 
313 	if (lv & HAMMER_LOCKF_EXCLUSIVE)
314 		return(1);
315 	else if (lv)
316 		return(-1);
317 	hpanic("lock must be held: %p", lock);
318 }
319 
320 /*
321  * Bump the ref count for a lock (not the excl/share count, but a separate
322  * structural reference count).  The CHECK flag will be set on a 0->1
323  * transition.
324  *
325  * This function does nothing to serialize races between multple threads.
326  * The caller can interlock it later on to deal with serialization.
327  *
328  * MPSAFE
329  */
330 void
331 hammer_ref(struct hammer_lock *lock)
332 {
333 	u_int lv;
334 	u_int nlv;
335 
336 	for (;;) {
337 		lv = lock->refs;
338 		if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
339 			nlv = (lv + 1) | HAMMER_REFS_CHECK;
340 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
341 				return;
342 		} else {
343 			nlv = (lv + 1);
344 			KKASSERT((int)nlv > 0);
345 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
346 				return;
347 		}
348 	}
349 	/* not reached */
350 }
351 
352 /*
353  * Drop the ref count for a lock (not the excl/share count, but a separate
354  * structural reference count).  The CHECK flag will be cleared on a 1->0
355  * transition.
356  *
357  * This function does nothing to serialize races between multple threads.
358  *
359  * MPSAFE
360  */
361 void
362 hammer_rel(struct hammer_lock *lock)
363 {
364 	u_int lv;
365 	u_int nlv;
366 
367 	for (;;) {
368 		lv = lock->refs;
369 		if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
370 			nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
371 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
372 				return;
373 		} else {
374 			KKASSERT((int)lv > 0);
375 			nlv = (lv - 1);
376 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
377 				return;
378 		}
379 	}
380 	/* not reached */
381 }
382 
383 /*
384  * The hammer_*_interlock() and hammer_*_interlock_done() functions are
385  * more sophisticated versions which handle MP transition races and block
386  * when necessary.
387  *
388  * hammer_ref_interlock() bumps the ref-count and conditionally acquires
389  * the interlock for 0->1 transitions or if the CHECK is found to be set.
390  *
391  * This case will return 1, the interlock will be held, and the CHECK
392  * bit also set.  Other threads attempting to ref will see the CHECK bit
393  * and block until we clean up.
394  *
395  * 0 is returned for transitions other than 0->1 when the CHECK bit
396  * is not found to be set, or if the function loses the race with another
397  * thread.
398  *
399  * 1 is only returned to one thread and the others will block.
400  * Effectively a 1 indicator means 'someone transitioned 0->1
401  * and you are the first guy to successfully lock it after that, so you
402  * need to check'.  Due to races the ref-count may be greater than 1 upon
403  * return.
404  *
405  * MPSAFE
406  */
407 int
408 hammer_ref_interlock(struct hammer_lock *lock)
409 {
410 	u_int lv;
411 	u_int nlv;
412 
413 	/*
414 	 * Integrated reference count bump, lock, and check, with hot-path.
415 	 *
416 	 * (a) Return 1	(+LOCKED, +CHECK)	0->1 transition
417 	 * (b) Return 0 (-LOCKED, -CHECK)	N->N+1 transition
418 	 * (c) Break out (+CHECK)		Check condition and Cannot lock
419 	 * (d) Return 1 (+LOCKED, +CHECK)	Successfully locked
420 	 */
421 	for (;;) {
422 		lv = lock->refs;
423 		if (lv == 0) {
424 			nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
425 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
426 				lock->rowner = curthread;
427 				return(1);
428 			}
429 		} else {
430 			nlv = (lv + 1);
431 			if ((lv & ~HAMMER_REFS_FLAGS) == 0)
432 				nlv |= HAMMER_REFS_CHECK;
433 			if ((nlv & HAMMER_REFS_CHECK) == 0) {
434 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
435 					return(0);
436 			} else if (lv & HAMMER_REFS_LOCKED) {
437 				/* CHECK also set here */
438 				if (atomic_cmpset_int(&lock->refs, lv, nlv))
439 					break;
440 			} else {
441 				/* CHECK also set here */
442 				nlv |= HAMMER_REFS_LOCKED;
443 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
444 					lock->rowner = curthread;
445 					return(1);
446 				}
447 			}
448 		}
449 	}
450 
451 	/*
452 	 * Defered check condition because we were unable to acquire the
453 	 * lock.  We must block until the check condition is cleared due
454 	 * to a race with another thread, or we are able to acquire the
455 	 * lock.
456 	 *
457 	 * (a) Return 0	(-CHECK)		Another thread handled it
458 	 * (b) Return 1 (+LOCKED, +CHECK)	We handled it.
459 	 */
460 	for (;;) {
461 		lv = lock->refs;
462 		if ((lv & HAMMER_REFS_CHECK) == 0)
463 			return(0);
464 		if (lv & HAMMER_REFS_LOCKED) {
465 			tsleep_interlock(&lock->refs, 0);
466 			nlv = (lv | HAMMER_REFS_WANTED);
467 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
468 				tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
469 		} else {
470 			/* CHECK also set here */
471 			nlv = lv | HAMMER_REFS_LOCKED;
472 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
473 				lock->rowner = curthread;
474 				return(1);
475 			}
476 		}
477 	}
478 	/* not reached */
479 }
480 
481 /*
482  * This is the same as hammer_ref_interlock() but asserts that the
483  * 0->1 transition is always true, thus the lock must have no references
484  * on entry or have CHECK set, and will have one reference with the
485  * interlock held on return.  It must also not be interlocked on entry
486  * by anyone.
487  *
488  * NOTE that CHECK will never be found set when the ref-count is 0.
489  *
490  * 1 is always returned to match the API for hammer_ref_interlock().
491  * This function returns with one ref, the lock held, and the CHECK bit set.
492  */
493 int
494 hammer_ref_interlock_true(struct hammer_lock *lock)
495 {
496 	u_int lv;
497 	u_int nlv;
498 
499 	for (;;) {
500 		lv = lock->refs;
501 
502 		if (lv) {
503 			hpanic("bad lock %p %08x", lock, lock->refs);
504 		}
505 		nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
506 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
507 			lock->rowner = curthread;
508 			return (1);
509 		}
510 	}
511 }
512 
513 /*
514  * Unlock the interlock acquired by hammer_ref_interlock() and clear the
515  * CHECK flag.  The ref-count remains unchanged.
516  *
517  * This routine is called in the load path when the load succeeds.
518  */
519 void
520 hammer_ref_interlock_done(struct hammer_lock *lock)
521 {
522 	u_int lv;
523 	u_int nlv;
524 
525 	for (;;) {
526 		lv = lock->refs;
527 		nlv = lv & ~HAMMER_REFS_FLAGS;
528 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
529 			if (lv & HAMMER_REFS_WANTED)
530 				wakeup(&lock->refs);
531 			break;
532 		}
533 	}
534 }
535 
536 /*
537  * hammer_rel_interlock() works a bit differently in that it must
538  * acquire the lock in tandem with a 1->0 transition.  CHECK is
539  * not used.
540  *
541  * 1 is returned on 1->0 transitions with the lock held on return
542  * and 0 is returned otherwise with the lock not held.
543  *
544  * It is important to note that the refs are not stable and may
545  * increase while we hold the lock, the 1 indication only means
546  * that we transitioned 1->0, not necessarily that we stayed at 0.
547  *
548  * Another thread bumping refs while we hold the lock will set CHECK,
549  * causing one of the competing hammer_ref_interlock() calls to
550  * return 1 after we release our lock.
551  *
552  * MPSAFE
553  */
554 int
555 hammer_rel_interlock(struct hammer_lock *lock, int locked)
556 {
557 	u_int lv;
558 	u_int nlv;
559 
560 	/*
561 	 * In locked mode (failure/unload path) we release the
562 	 * ref-count but leave it locked.
563 	 */
564 	if (locked) {
565 		hammer_rel(lock);
566 		return(1);
567 	}
568 
569 	/*
570 	 * Integrated reference count drop with LOCKED, plus the hot-path
571 	 * returns.
572 	 */
573 	for (;;) {
574 		lv = lock->refs;
575 
576 		if (lv == 1) {
577 			nlv = 0 | HAMMER_REFS_LOCKED;
578 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
579 				lock->rowner = curthread;
580 				return(1);
581 			}
582 		} else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
583 			if ((lv & HAMMER_REFS_LOCKED) == 0) {
584 				nlv = (lv - 1) | HAMMER_REFS_LOCKED;
585 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
586 					lock->rowner = curthread;
587 					return(1);
588 				}
589 			} else {
590 				nlv = lv | HAMMER_REFS_WANTED;
591 				tsleep_interlock(&lock->refs, 0);
592 				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
593 					tsleep(&lock->refs, PINTERLOCKED,
594 					       "h0lk", 0);
595 				}
596 			}
597 		} else {
598 			nlv = (lv - 1);
599 			KKASSERT((int)nlv >= 0);
600 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
601 				return(0);
602 		}
603 	}
604 	/* not reached */
605 }
606 
607 /*
608  * Unlock the interlock acquired by hammer_rel_interlock().
609  *
610  * If orig_locked is non-zero the interlock was originally held prior to
611  * the hammer_rel_interlock() call and passed through to us.  In this
612  * case we want to retain the CHECK error state if not transitioning
613  * to 0.
614  *
615  * The code is the same either way so we do not have to conditionalize
616  * on orig_locked.
617  */
618 void
619 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
620 {
621 	u_int lv;
622 	u_int nlv;
623 
624 	for (;;) {
625 		lv = lock->refs;
626 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
627 		if ((lv & ~HAMMER_REFS_FLAGS) == 0)
628 			nlv &= ~HAMMER_REFS_CHECK;
629 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
630 			if (lv & HAMMER_REFS_WANTED)
631 				wakeup(&lock->refs);
632 			break;
633 		}
634 	}
635 }
636 
637 /*
638  * Acquire the interlock on lock->refs.
639  *
640  * Return 1 if CHECK is currently set.  Note that CHECK will not
641  * be set if the reference count is 0, but can get set if this function
642  * is preceeded by, say, hammer_ref(), or through races with other
643  * threads.  The return value allows the caller to use the same logic
644  * as hammer_ref_interlock().
645  *
646  * MPSAFE
647  */
648 int
649 hammer_get_interlock(struct hammer_lock *lock)
650 {
651 	u_int lv;
652 	u_int nlv;
653 
654 	for (;;) {
655 		lv = lock->refs;
656 		if (lv & HAMMER_REFS_LOCKED) {
657 			nlv = lv | HAMMER_REFS_WANTED;
658 			tsleep_interlock(&lock->refs, 0);
659 			if (atomic_cmpset_int(&lock->refs, lv, nlv))
660 				tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
661 		} else {
662 			nlv = (lv | HAMMER_REFS_LOCKED);
663 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
664 				lock->rowner = curthread;
665 				return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
666 			}
667 		}
668 	}
669 }
670 
671 /*
672  * Attempt to acquire the interlock and expect 0 refs.  Used by the buffer
673  * cache callback code to disassociate or lock the bufs related to HAMMER
674  * structures.
675  *
676  * During teardown the related bp will be acquired by hammer_io_release()
677  * which interocks our test.
678  *
679  * Returns non-zero on success, zero on failure.
680  */
681 int
682 hammer_try_interlock_norefs(struct hammer_lock *lock)
683 {
684 	u_int lv;
685 	u_int nlv;
686 
687 	for (;;) {
688 		lv = lock->refs;
689 		if (lv == 0) {
690 			nlv = lv | HAMMER_REFS_LOCKED;
691 			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
692 				lock->rowner = curthread;
693 				return(1);
694 			}
695 		} else {
696 			return(0);
697 		}
698 	}
699 	/* not reached */
700 }
701 
702 /*
703  * Release the interlock on lock->refs.  This function will set
704  * CHECK if the refs is non-zero and error is non-zero, and clear
705  * CHECK otherwise.
706  *
707  * MPSAFE
708  */
709 void
710 hammer_put_interlock(struct hammer_lock *lock, int error)
711 {
712 	u_int lv;
713 	u_int nlv;
714 
715 	for (;;) {
716 		lv = lock->refs;
717 		KKASSERT(lv & HAMMER_REFS_LOCKED);
718 		nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
719 
720 		if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
721 			nlv &= ~HAMMER_REFS_CHECK;
722 		else
723 			nlv |= HAMMER_REFS_CHECK;
724 
725 		if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
726 			if (lv & HAMMER_REFS_WANTED)
727 				wakeup(&lock->refs);
728 			return;
729 		}
730 	}
731 }
732 
733 /*
734  * The sync_lock must be held when doing any modifying operations on
735  * meta-data.  It does not have to be held when modifying non-meta-data buffers
736  * (backend or frontend).
737  *
738  * The flusher holds the lock exclusively while all other consumers hold it
739  * shared.  All modifying operations made while holding the lock are atomic
740  * in that they will be made part of the same flush group.
741  *
742  * Due to the atomicy requirement deadlock recovery code CANNOT release the
743  * sync lock, nor can we give pending exclusive sync locks priority over
744  * a shared sync lock as this could lead to a 3-way deadlock.
745  */
746 void
747 hammer_sync_lock_ex(hammer_transaction_t trans)
748 {
749 	++trans->sync_lock_refs;
750 	hammer_lock_ex(&trans->hmp->sync_lock);
751 }
752 
753 void
754 hammer_sync_lock_sh(hammer_transaction_t trans)
755 {
756 	++trans->sync_lock_refs;
757 	hammer_lock_sh(&trans->hmp->sync_lock);
758 }
759 
760 int
761 hammer_sync_lock_sh_try(hammer_transaction_t trans)
762 {
763 	int error;
764 
765 	++trans->sync_lock_refs;
766 	if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
767 		--trans->sync_lock_refs;
768 	return (error);
769 }
770 
771 void
772 hammer_sync_unlock(hammer_transaction_t trans)
773 {
774 	--trans->sync_lock_refs;
775 	hammer_unlock(&trans->hmp->sync_lock);
776 }
777 
778 /*
779  * Misc
780  */
781 u_int32_t
782 hammer_to_unix_xid(uuid_t *uuid)
783 {
784 	return(*(u_int32_t *)&uuid->node[2]);
785 }
786 
787 void
788 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
789 {
790 	bzero(uuid, sizeof(*uuid));
791 	*(u_int32_t *)&uuid->node[2] = guid;
792 }
793 
794 void
795 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
796 {
797 	ts->tv_sec = (unsigned long)(xtime / 1000000);
798 	ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
799 }
800 
801 u_int64_t
802 hammer_timespec_to_time(struct timespec *ts)
803 {
804 	u_int64_t xtime;
805 
806 	xtime = (unsigned)(ts->tv_nsec / 1000) +
807 		(unsigned long)ts->tv_sec * 1000000ULL;
808 	return(xtime);
809 }
810 
811 
812 /*
813  * Convert a HAMMER filesystem object type to a vnode type
814  */
815 enum vtype
816 hammer_get_vnode_type(u_int8_t obj_type)
817 {
818 	switch(obj_type) {
819 	case HAMMER_OBJTYPE_DIRECTORY:
820 		return(VDIR);
821 	case HAMMER_OBJTYPE_REGFILE:
822 		return(VREG);
823 	case HAMMER_OBJTYPE_DBFILE:
824 		return(VDATABASE);
825 	case HAMMER_OBJTYPE_FIFO:
826 		return(VFIFO);
827 	case HAMMER_OBJTYPE_SOCKET:
828 		return(VSOCK);
829 	case HAMMER_OBJTYPE_CDEV:
830 		return(VCHR);
831 	case HAMMER_OBJTYPE_BDEV:
832 		return(VBLK);
833 	case HAMMER_OBJTYPE_SOFTLINK:
834 		return(VLNK);
835 	default:
836 		return(VBAD);
837 	}
838 	/* not reached */
839 }
840 
841 int
842 hammer_get_dtype(u_int8_t obj_type)
843 {
844 	switch(obj_type) {
845 	case HAMMER_OBJTYPE_DIRECTORY:
846 		return(DT_DIR);
847 	case HAMMER_OBJTYPE_REGFILE:
848 		return(DT_REG);
849 	case HAMMER_OBJTYPE_DBFILE:
850 		return(DT_DBF);
851 	case HAMMER_OBJTYPE_FIFO:
852 		return(DT_FIFO);
853 	case HAMMER_OBJTYPE_SOCKET:
854 		return(DT_SOCK);
855 	case HAMMER_OBJTYPE_CDEV:
856 		return(DT_CHR);
857 	case HAMMER_OBJTYPE_BDEV:
858 		return(DT_BLK);
859 	case HAMMER_OBJTYPE_SOFTLINK:
860 		return(DT_LNK);
861 	default:
862 		return(DT_UNKNOWN);
863 	}
864 	/* not reached */
865 }
866 
867 u_int8_t
868 hammer_get_obj_type(enum vtype vtype)
869 {
870 	switch(vtype) {
871 	case VDIR:
872 		return(HAMMER_OBJTYPE_DIRECTORY);
873 	case VREG:
874 		return(HAMMER_OBJTYPE_REGFILE);
875 	case VDATABASE:
876 		return(HAMMER_OBJTYPE_DBFILE);
877 	case VFIFO:
878 		return(HAMMER_OBJTYPE_FIFO);
879 	case VSOCK:
880 		return(HAMMER_OBJTYPE_SOCKET);
881 	case VCHR:
882 		return(HAMMER_OBJTYPE_CDEV);
883 	case VBLK:
884 		return(HAMMER_OBJTYPE_BDEV);
885 	case VLNK:
886 		return(HAMMER_OBJTYPE_SOFTLINK);
887 	default:
888 		return(HAMMER_OBJTYPE_UNKNOWN);
889 	}
890 	/* not reached */
891 }
892 
893 /*
894  * Return flags for hammer_delete_at_cursor()
895  */
896 int
897 hammer_nohistory(hammer_inode_t ip)
898 {
899 	if (ip->hmp->hflags & HMNT_NOHISTORY)
900 		return(HAMMER_DELETE_DESTROY);
901 	if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
902 		return(HAMMER_DELETE_DESTROY);
903 	return(0);
904 }
905 
906 /*
907  * ALGORITHM VERSION 0:
908  *	Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
909  *	crc in the MSB and 0 in the LSB.  The caller will use the low 32 bits
910  *	to generate a unique key and will scan all entries with the same upper
911  *	32 bits when issuing a lookup.
912  *
913  *	0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
914  *
915  * ALGORITHM VERSION 1:
916  *
917  *	This algorithm breaks the filename down into a separate 32-bit crcs
918  *	for each filename segment separated by a special character (dot,
919  *	underscore, underline, or tilde).  The CRCs are then added together.
920  *	This allows temporary names.  A full-filename 16 bit crc is also
921  *	generated to deal with degenerate conditions.
922  *
923  *	The algorithm is designed to handle create/rename situations such
924  *	that a create with an extention to a rename without an extention
925  *	only shifts the key space rather than randomizes it.
926  *
927  *	NOTE: The inode allocator cache can only match 10 bits so we do
928  *	      not really have any room for a partial sorted name, and
929  *	      numbers don't sort well in that situation anyway.
930  *
931  *	0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
932  *
933  *
934  * We strip bit 63 in order to provide a positive key, this way a seek
935  * offset of 0 will represent the base of the directory.
936  *
937  * We usually strip bit 0 (set it to 0) in order to provide a consistent
938  * iteration space for collisions.
939  *
940  * This function can never return 0.  We use the MSB-0 space to synthesize
941  * artificial directory entries such as "." and "..".
942  */
943 int64_t
944 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
945 			 u_int32_t *max_iterationsp)
946 {
947 	const char *aname = name;
948 	int32_t crcx;
949 	int64_t key;
950 	int i;
951 	int j;
952 
953 	switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
954 	case HAMMER_INODE_CAP_DIRHASH_ALG0:
955 		/*
956 		 * Original algorithm
957 		 */
958 		key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
959 		if (key == 0)
960 			key |= 0x100000000LL;
961 		*max_iterationsp = 0xFFFFFFFFU;
962 		break;
963 	case HAMMER_INODE_CAP_DIRHASH_ALG1:
964 		/*
965 		 * Filesystem version 6 or better will create directories
966 		 * using the ALG1 dirhash.  This hash breaks the filename
967 		 * up into domains separated by special characters and
968 		 * hashes each domain independently.
969 		 *
970 		 * We also do a simple sub-sort using the first character
971 		 * of the filename in the top 5-bits.
972 		 */
973 		key = 0;
974 
975 		/*
976 		 * m32
977 		 */
978 		crcx = 0;
979 		for (i = j = 0; i < len; ++i) {
980 			if (aname[i] == '.' ||
981 			    aname[i] == '-' ||
982 			    aname[i] == '_' ||
983 			    aname[i] == '~') {
984 				if (i != j)
985 					crcx += crc32(aname + j, i - j);
986 				j = i + 1;
987 			}
988 		}
989 		if (i != j)
990 			crcx += crc32(aname + j, i - j);
991 
992 #if 0
993 		/*
994 		 * xor top 5 bits 0mmmm into low bits and steal the top 5
995 		 * bits as a semi sub sort using the first character of
996 		 * the filename.  bit 63 is always left as 0 so directory
997 		 * keys are positive numbers.
998 		 */
999 		crcx ^= (uint32_t)crcx >> (32 - 5);
1000 		crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1001 #endif
1002 		crcx &= 0x7FFFFFFFU;
1003 
1004 		key |= (uint64_t)crcx << 32;
1005 
1006 		/*
1007 		 * l16 - crc of entire filename
1008 		 *
1009 		 * This crc reduces degenerate hash collision conditions
1010 		 */
1011 		crcx = crc32(aname, len);
1012 		crcx = crcx ^ (crcx << 16);
1013 		key |= crcx & 0xFFFF0000U;
1014 
1015 		/*
1016 		 * Cleanup
1017 		 */
1018 		if ((key & 0xFFFFFFFF00000000LL) == 0)
1019 			key |= 0x100000000LL;
1020 		if (hammer_debug_general & 0x0400) {
1021 			hdkprintf("0x%016llx %*.*s\n",
1022 				(long long)key, len, len, aname);
1023 		}
1024 		*max_iterationsp = 0x00FFFFFF;
1025 		break;
1026 	case HAMMER_INODE_CAP_DIRHASH_ALG2:
1027 	case HAMMER_INODE_CAP_DIRHASH_ALG3:
1028 	default:
1029 		key = 0;			/* compiler warning */
1030 		*max_iterationsp = 1;		/* sanity */
1031 		hpanic("bad algorithm %p", dip);
1032 		break;
1033 	}
1034 	return(key);
1035 }
1036 
1037 /*
1038  * Convert string after @@ (@@ not included) to TID.  Returns 0 on success,
1039  * EINVAL on failure.
1040  *
1041  * If this function fails *ispfs, *tidp, and *localizationp will not
1042  * be modified.
1043  */
1044 int
1045 hammer_str_to_tid(const char *str, int *ispfsp,
1046 		  hammer_tid_t *tidp, u_int32_t *localizationp)
1047 {
1048 	hammer_tid_t tid;
1049 	u_int32_t localization;
1050 	char *ptr;
1051 	int ispfs;
1052 	int n;
1053 
1054 	/*
1055 	 * Forms allowed for TID:  "0x%016llx"
1056 	 *			   "-1"
1057 	 */
1058 	tid = strtouq(str, &ptr, 0);
1059 	n = ptr - str;
1060 	if (n == 2 && str[0] == '-' && str[1] == '1') {
1061 		/* ok */
1062 	} else if (n == 18 && str[0] == '0' && (str[1] | 0x20) == 'x') {
1063 		/* ok */
1064 	} else {
1065 		return(EINVAL);
1066 	}
1067 
1068 	/*
1069 	 * Forms allowed for PFS:  ":%05d"  (i.e. "...:0" would be illegal).
1070 	 */
1071 	str = ptr;
1072 	if (*str == ':') {
1073 		localization = strtoul(str + 1, &ptr, 10) << 16;
1074 		if (ptr - str != 6)
1075 			return(EINVAL);
1076 		str = ptr;
1077 		ispfs = 1;
1078 	} else {
1079 		localization = *localizationp;
1080 		ispfs = 0;
1081 	}
1082 
1083 	/*
1084 	 * Any trailing junk invalidates special extension handling.
1085 	 */
1086 	if (*str)
1087 		return(EINVAL);
1088 	*tidp = tid;
1089 	*localizationp = localization;
1090 	*ispfsp = ispfs;
1091 	return(0);
1092 }
1093 
1094 void
1095 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1096 {
1097 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1098 }
1099 
1100 void
1101 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1102 {
1103 	ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1104 			  crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1105 }
1106 
1107 int
1108 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1109 {
1110 	hammer_crc_t crc;
1111 
1112 	crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1113 	return (blockmap->entry_crc == crc);
1114 }
1115 
1116 int
1117 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1118 {
1119 	hammer_crc_t crc;
1120 
1121 	crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1122 	      crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1123 	return (ondisk->vol_crc == crc);
1124 }
1125 
1126 int
1127 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1128 {
1129 	hammer_crc_t crc;
1130 
1131 	crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1132 	return (ondisk->crc == crc);
1133 }
1134 
1135 /*
1136  * Test or set the leaf->data_crc field.  Deal with any special cases given
1137  * a generic B-Tree leaf element and its data.
1138  *
1139  * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1140  *       to be updated in-place.
1141  */
1142 int
1143 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1144 {
1145 	hammer_crc_t crc;
1146 
1147 	if (leaf->data_len == 0) {
1148 		crc = 0;
1149 	} else {
1150 		switch(leaf->base.rec_type) {
1151 		case HAMMER_RECTYPE_INODE:
1152 			if (leaf->data_len != sizeof(struct hammer_inode_data))
1153 				return(0);
1154 			crc = crc32(data, HAMMER_INODE_CRCSIZE);
1155 			break;
1156 		default:
1157 			crc = crc32(data, leaf->data_len);
1158 			break;
1159 		}
1160 	}
1161 	return (leaf->data_crc == crc);
1162 }
1163 
1164 void
1165 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1166 {
1167 	if (leaf->data_len == 0) {
1168 		leaf->data_crc = 0;
1169 	} else {
1170 		switch(leaf->base.rec_type) {
1171 		case HAMMER_RECTYPE_INODE:
1172 			KKASSERT(leaf->data_len ==
1173 				  sizeof(struct hammer_inode_data));
1174 			leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1175 			break;
1176 		default:
1177 			leaf->data_crc = crc32(data, leaf->data_len);
1178 			break;
1179 		}
1180 	}
1181 }
1182 
1183 /*
1184  * Return the block size at the specified file offset.
1185  */
1186 int
1187 hammer_blocksize(int64_t file_offset)
1188 {
1189 	if (file_offset < HAMMER_XDEMARC)
1190 		return(HAMMER_BUFSIZE);
1191 	else
1192 		return(HAMMER_XBUFSIZE);
1193 }
1194 
1195 int
1196 hammer_blockoff(int64_t file_offset)
1197 {
1198 	if (file_offset < HAMMER_XDEMARC)
1199 		return((int)file_offset & HAMMER_BUFMASK);
1200 	else
1201 		return((int)file_offset & HAMMER_XBUFMASK);
1202 }
1203 
1204 /*
1205  * Return the demarkation point between the two offsets where
1206  * the block size changes.
1207  */
1208 int64_t
1209 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1210 {
1211 	if (file_offset1 < HAMMER_XDEMARC) {
1212 		if (file_offset2 <= HAMMER_XDEMARC)
1213 			return(file_offset2);
1214 		return(HAMMER_XDEMARC);
1215 	}
1216 	hpanic("illegal range %lld %lld",
1217 	      (long long)file_offset1, (long long)file_offset2);
1218 }
1219 
1220 udev_t
1221 hammer_fsid_to_udev(uuid_t *uuid)
1222 {
1223 	u_int32_t crc;
1224 
1225 	crc = crc32(uuid, sizeof(*uuid));
1226 	return((udev_t)crc);
1227 }
1228 
1229