xref: /dragonfly/sys/vfs/hammer2/hammer2_inode.c (revision e314d7e2)
1 /*
2  * Copyright (c) 2011-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41 
42 #include "hammer2.h"
43 
44 #define INODE_DEBUG	0
45 
46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
47 					 hammer2_cluster_t **cparentp,
48 					 hammer2_cluster_t **clusterp,
49 					 hammer2_tid_t inum);
50 
51 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
52 	     hammer2_tid_t, inum);
53 
54 int
55 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
56 {
57 	if (ip1->inum < ip2->inum)
58 		return(-1);
59 	if (ip1->inum > ip2->inum)
60 		return(1);
61 	return(0);
62 }
63 
64 /*
65  * HAMMER2 inode locks
66  *
67  * HAMMER2 offers shared locks and exclusive locks on inodes.
68  *
69  * The inode locking function locks the inode itself, resolves any stale
70  * chains in the inode's cluster, and allocates a fresh copy of the
71  * cluster with 1 ref and all the underlying chains locked.  Duplication
72  * races are handled by this function.
73  *
74  * ip->cluster will be stable while the inode is locked.
75  *
76  * NOTE: We don't combine the inode/chain lock because putting away an
77  *       inode would otherwise confuse multiple lock holders of the inode.
78  *
79  * NOTE: In-memory inodes always point to hardlink targets (the actual file),
80  *	 and never point to a hardlink pointer.
81  */
82 hammer2_cluster_t *
83 hammer2_inode_lock_ex(hammer2_inode_t *ip)
84 {
85 	const hammer2_inode_data_t *ipdata;
86 	hammer2_cluster_t *cluster;
87 	hammer2_chain_t *chain;
88 	int i;
89 
90 	hammer2_inode_ref(ip);
91 	ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
92 	cluster = hammer2_cluster_copy(&ip->cluster,
93 				       HAMMER2_CLUSTER_COPY_NOCHAINS);
94 
95 	ip->cluster.focus = NULL;
96 	cluster->focus = NULL;
97 
98 	for (i = 0; i < cluster->nchains; ++i) {
99 		chain = ip->cluster.array[i];
100 		if (chain == NULL) {
101 			kprintf("inode_lock: %p: missing chain\n", ip);
102 			continue;
103 		}
104 
105 		hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
106 		cluster->array[i] = chain;
107 		if (cluster->focus == NULL)
108 			cluster->focus = chain;
109 		if (ip->cluster.focus == NULL)
110 			ip->cluster.focus = chain;
111 	}
112 
113 	/*
114 	 * Returned cluster must resolve hardlink pointers
115 	 */
116 	ipdata = &hammer2_cluster_data(cluster)->ipdata;
117 	KKASSERT(ipdata->type != HAMMER2_OBJTYPE_HARDLINK);
118 	/*
119 	if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK &&
120 	    (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
121 		error = hammer2_hardlink_find(ip->pip, NULL, cluster);
122 		KKASSERT(error == 0);
123 	}
124 	*/
125 
126 	return (cluster);
127 }
128 
129 void
130 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
131 {
132 	if (cluster)
133 		hammer2_cluster_unlock(cluster);
134 	ccms_thread_unlock(&ip->topo_cst);
135 	hammer2_inode_drop(ip);
136 }
137 
138 /*
139  * NOTE: We don't combine the inode/chain lock because putting away an
140  *       inode would otherwise confuse multiple lock holders of the inode.
141  *
142  *	 Shared locks are especially sensitive to having too many shared
143  *	 lock counts (from the same thread) on certain paths which might
144  *	 need to upgrade them.  Only one count of a shared lock can be
145  *	 upgraded.
146  */
147 hammer2_cluster_t *
148 hammer2_inode_lock_sh(hammer2_inode_t *ip)
149 {
150 	const hammer2_inode_data_t *ipdata;
151 	hammer2_cluster_t *cluster;
152 	hammer2_chain_t *chain;
153 	int i;
154 
155 	hammer2_inode_ref(ip);
156 	cluster = hammer2_cluster_copy(&ip->cluster,
157 				       HAMMER2_CLUSTER_COPY_NOCHAINS);
158 	ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED);
159 
160 	cluster->focus = NULL;
161 
162 	for (i = 0; i < cluster->nchains; ++i) {
163 		chain = ip->cluster.array[i];
164 
165 		if (chain == NULL) {
166 			kprintf("inode_lock: %p: missing chain\n", ip);
167 			continue;
168 		}
169 
170 		hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
171 					  HAMMER2_RESOLVE_SHARED);
172 		cluster->array[i] = chain;
173 		if (cluster->focus == NULL)
174 			cluster->focus = chain;
175 	}
176 
177 	/*
178 	 * Returned cluster must resolve hardlink pointers
179 	 */
180 	ipdata = &hammer2_cluster_data(cluster)->ipdata;
181 	KKASSERT(ipdata->type != HAMMER2_OBJTYPE_HARDLINK);
182 	/*
183 	if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK &&
184 	    (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) {
185 		error = hammer2_hardlink_find(ip->pip, NULL, cluster);
186 		KKASSERT(error == 0);
187 	}
188 	*/
189 
190 	return (cluster);
191 }
192 
193 void
194 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster)
195 {
196 	if (cluster)
197 		hammer2_cluster_unlock(cluster);
198 	ccms_thread_unlock(&ip->topo_cst);
199 	hammer2_inode_drop(ip);
200 }
201 
202 ccms_state_t
203 hammer2_inode_lock_temp_release(hammer2_inode_t *ip)
204 {
205 	return(ccms_thread_lock_temp_release(&ip->topo_cst));
206 }
207 
208 void
209 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate)
210 {
211 	ccms_thread_lock_temp_restore(&ip->topo_cst, ostate);
212 }
213 
214 ccms_state_t
215 hammer2_inode_lock_upgrade(hammer2_inode_t *ip)
216 {
217 	return(ccms_thread_lock_upgrade(&ip->topo_cst));
218 }
219 
220 void
221 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate)
222 {
223 	ccms_thread_lock_downgrade(&ip->topo_cst, ostate);
224 }
225 
226 /*
227  * Lookup an inode by inode number
228  */
229 hammer2_inode_t *
230 hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum)
231 {
232 	hammer2_inode_t *ip;
233 
234 	KKASSERT(pmp);
235 	if (pmp->spmp_hmp) {
236 		ip = NULL;
237 	} else {
238 		spin_lock(&pmp->inum_spin);
239 		ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
240 		if (ip)
241 			hammer2_inode_ref(ip);
242 		spin_unlock(&pmp->inum_spin);
243 	}
244 	return(ip);
245 }
246 
247 /*
248  * Adding a ref to an inode is only legal if the inode already has at least
249  * one ref.
250  *
251  * (can be called with spinlock held)
252  */
253 void
254 hammer2_inode_ref(hammer2_inode_t *ip)
255 {
256 	atomic_add_int(&ip->refs, 1);
257 }
258 
259 /*
260  * Drop an inode reference, freeing the inode when the last reference goes
261  * away.
262  */
263 void
264 hammer2_inode_drop(hammer2_inode_t *ip)
265 {
266 	hammer2_pfsmount_t *pmp;
267 	hammer2_inode_t *pip;
268 	u_int refs;
269 
270 	while (ip) {
271 		refs = ip->refs;
272 		cpu_ccfence();
273 		if (refs == 1) {
274 			/*
275 			 * Transition to zero, must interlock with
276 			 * the inode inumber lookup tree (if applicable).
277 			 */
278 			pmp = ip->pmp;
279 			KKASSERT(pmp);
280 			spin_lock(&pmp->inum_spin);
281 
282 			if (atomic_cmpset_int(&ip->refs, 1, 0)) {
283 				KKASSERT(ip->topo_cst.count == 0);
284 				if (ip->flags & HAMMER2_INODE_ONRBTREE) {
285 					atomic_clear_int(&ip->flags,
286 						     HAMMER2_INODE_ONRBTREE);
287 					RB_REMOVE(hammer2_inode_tree,
288 						  &pmp->inum_tree, ip);
289 				}
290 				spin_unlock(&pmp->inum_spin);
291 
292 				pip = ip->pip;
293 				ip->pip = NULL;
294 				ip->pmp = NULL;
295 
296 				/*
297 				 * Cleaning out ip->cluster isn't entirely
298 				 * trivial.
299 				 */
300 				hammer2_inode_repoint(ip, NULL, NULL);
301 
302 				/*
303 				 * We have to drop pip (if non-NULL) to
304 				 * dispose of our implied reference from
305 				 * ip->pip.  We can simply loop on it.
306 				 */
307 				kfree(ip, pmp->minode);
308 				atomic_add_long(&pmp->inmem_inodes, -1);
309 				ip = pip;
310 				/* continue with pip (can be NULL) */
311 			} else {
312 				spin_unlock(&ip->pmp->inum_spin);
313 			}
314 		} else {
315 			/*
316 			 * Non zero transition
317 			 */
318 			if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
319 				break;
320 		}
321 	}
322 }
323 
324 /*
325  * Get the vnode associated with the given inode, allocating the vnode if
326  * necessary.  The vnode will be returned exclusively locked.
327  *
328  * The caller must lock the inode (shared or exclusive).
329  *
330  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
331  * races.
332  */
333 struct vnode *
334 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp)
335 {
336 	const hammer2_inode_data_t *ipdata;
337 	hammer2_pfsmount_t *pmp;
338 	struct vnode *vp;
339 	ccms_state_t ostate;
340 
341 	pmp = ip->pmp;
342 	KKASSERT(pmp != NULL);
343 	*errorp = 0;
344 
345 	ipdata = &hammer2_cluster_data(cparent)->ipdata;
346 
347 	for (;;) {
348 		/*
349 		 * Attempt to reuse an existing vnode assignment.  It is
350 		 * possible to race a reclaim so the vget() may fail.  The
351 		 * inode must be unlocked during the vget() to avoid a
352 		 * deadlock against a reclaim.
353 		 */
354 		vp = ip->vp;
355 		if (vp) {
356 			/*
357 			 * Inode must be unlocked during the vget() to avoid
358 			 * possible deadlocks, but leave the ip ref intact.
359 			 *
360 			 * vnode is held to prevent destruction during the
361 			 * vget().  The vget() can still fail if we lost
362 			 * a reclaim race on the vnode.
363 			 */
364 			vhold(vp);
365 			ostate = hammer2_inode_lock_temp_release(ip);
366 			if (vget(vp, LK_EXCLUSIVE)) {
367 				vdrop(vp);
368 				hammer2_inode_lock_temp_restore(ip, ostate);
369 				continue;
370 			}
371 			hammer2_inode_lock_temp_restore(ip, ostate);
372 			vdrop(vp);
373 			/* vp still locked and ref from vget */
374 			if (ip->vp != vp) {
375 				kprintf("hammer2: igetv race %p/%p\n",
376 					ip->vp, vp);
377 				vput(vp);
378 				continue;
379 			}
380 			*errorp = 0;
381 			break;
382 		}
383 
384 		/*
385 		 * No vnode exists, allocate a new vnode.  Beware of
386 		 * allocation races.  This function will return an
387 		 * exclusively locked and referenced vnode.
388 		 */
389 		*errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
390 		if (*errorp) {
391 			kprintf("hammer2: igetv getnewvnode failed %d\n",
392 				*errorp);
393 			vp = NULL;
394 			break;
395 		}
396 
397 		/*
398 		 * Lock the inode and check for an allocation race.
399 		 */
400 		ostate = hammer2_inode_lock_upgrade(ip);
401 		if (ip->vp != NULL) {
402 			vp->v_type = VBAD;
403 			vx_put(vp);
404 			hammer2_inode_lock_downgrade(ip, ostate);
405 			continue;
406 		}
407 
408 		switch (ipdata->type) {
409 		case HAMMER2_OBJTYPE_DIRECTORY:
410 			vp->v_type = VDIR;
411 			break;
412 		case HAMMER2_OBJTYPE_REGFILE:
413 			vp->v_type = VREG;
414 			vinitvmio(vp, ipdata->size,
415 				  HAMMER2_LBUFSIZE,
416 				  (int)ipdata->size & HAMMER2_LBUFMASK);
417 			break;
418 		case HAMMER2_OBJTYPE_SOFTLINK:
419 			/*
420 			 * XXX for now we are using the generic file_read
421 			 * and file_write code so we need a buffer cache
422 			 * association.
423 			 */
424 			vp->v_type = VLNK;
425 			vinitvmio(vp, ipdata->size,
426 				  HAMMER2_LBUFSIZE,
427 				  (int)ipdata->size & HAMMER2_LBUFMASK);
428 			break;
429 		case HAMMER2_OBJTYPE_CDEV:
430 			vp->v_type = VCHR;
431 			/* fall through */
432 		case HAMMER2_OBJTYPE_BDEV:
433 			vp->v_ops = &pmp->mp->mnt_vn_spec_ops;
434 			if (ipdata->type != HAMMER2_OBJTYPE_CDEV)
435 				vp->v_type = VBLK;
436 			addaliasu(vp, ipdata->rmajor, ipdata->rminor);
437 			break;
438 		case HAMMER2_OBJTYPE_FIFO:
439 			vp->v_type = VFIFO;
440 			vp->v_ops = &pmp->mp->mnt_vn_fifo_ops;
441 			break;
442 		default:
443 			panic("hammer2: unhandled objtype %d", ipdata->type);
444 			break;
445 		}
446 
447 		if (ip == pmp->iroot)
448 			vsetflags(vp, VROOT);
449 
450 		vp->v_data = ip;
451 		ip->vp = vp;
452 		hammer2_inode_ref(ip);		/* vp association */
453 		hammer2_inode_lock_downgrade(ip, ostate);
454 		break;
455 	}
456 
457 	/*
458 	 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
459 	 */
460 	if (hammer2_debug & 0x0002) {
461 		kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n",
462 			vp, vp->v_refcnt, vp->v_auxrefs);
463 	}
464 	return (vp);
465 }
466 
467 /*
468  * Returns the inode associated with the passed-in cluster, creating the
469  * inode if necessary and synchronizing it to the passed-in cluster otherwise.
470  *
471  * The passed-in chain must be locked and will remain locked on return.
472  * The returned inode will be locked and the caller may dispose of both
473  * via hammer2_inode_unlock_ex().  However, if the caller needs to resolve
474  * a hardlink it must ref/unlock/relock/drop the inode.
475  *
476  * The hammer2_inode structure regulates the interface between the high level
477  * kernel VNOPS API and the filesystem backend (the chains).
478  */
479 hammer2_inode_t *
480 hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
481 		  hammer2_cluster_t *cluster)
482 {
483 	hammer2_inode_t *nip;
484 	const hammer2_inode_data_t *iptmp;
485 	const hammer2_inode_data_t *nipdata;
486 
487 	KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
488 	KKASSERT(pmp);
489 
490 	/*
491 	 * Interlocked lookup/ref of the inode.  This code is only needed
492 	 * when looking up inodes with nlinks != 0 (TODO: optimize out
493 	 * otherwise and test for duplicates).
494 	 */
495 again:
496 	for (;;) {
497 		iptmp = &hammer2_cluster_data(cluster)->ipdata;
498 		nip = hammer2_inode_lookup(pmp, iptmp->inum);
499 		if (nip == NULL)
500 			break;
501 
502 		ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
503 
504 		/*
505 		 * Handle SMP race (not applicable to the super-root spmp
506 		 * which can't index inodes due to duplicative inode numbers).
507 		 */
508 		if (pmp->spmp_hmp == NULL &&
509 		    (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
510 			ccms_thread_unlock(&nip->topo_cst);
511 			hammer2_inode_drop(nip);
512 			continue;
513 		}
514 		hammer2_inode_repoint(nip, NULL, cluster);
515 		return nip;
516 	}
517 
518 	/*
519 	 * We couldn't find the inode number, create a new inode.
520 	 */
521 	nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO);
522 	atomic_add_long(&pmp->inmem_inodes, 1);
523 	hammer2_pfs_memory_inc(pmp);
524 	hammer2_pfs_memory_wakeup(pmp);
525 	if (pmp->spmp_hmp)
526 		nip->flags = HAMMER2_INODE_SROOT;
527 
528 	/*
529 	 * Initialize nip's cluster
530 	 */
531 	nip->cluster.refs = 1;
532 	nip->cluster.pmp = pmp;
533 	nip->cluster.flags |= HAMMER2_CLUSTER_INODE;
534 	hammer2_cluster_replace(&nip->cluster, cluster);
535 
536 	nipdata = &hammer2_cluster_data(cluster)->ipdata;
537 	nip->inum = nipdata->inum;
538 	nip->size = nipdata->size;
539 	nip->mtime = nipdata->mtime;
540 	hammer2_inode_repoint(nip, NULL, cluster);
541 	nip->pip = dip;				/* can be NULL */
542 	if (dip)
543 		hammer2_inode_ref(dip);	/* ref dip for nip->pip */
544 
545 	nip->pmp = pmp;
546 
547 	/*
548 	 * ref and lock on nip gives it state compatible to after a
549 	 * hammer2_inode_lock_ex() call.
550 	 */
551 	nip->refs = 1;
552 	ccms_cst_init(&nip->topo_cst, &nip->cluster);
553 	ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
554 	/* combination of thread lock and chain lock == inode lock */
555 
556 	/*
557 	 * Attempt to add the inode.  If it fails we raced another inode
558 	 * get.  Undo all the work and try again.
559 	 */
560 	if (pmp->spmp_hmp == NULL) {
561 		spin_lock(&pmp->inum_spin);
562 		if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
563 			spin_unlock(&pmp->inum_spin);
564 			ccms_thread_unlock(&nip->topo_cst);
565 			hammer2_inode_drop(nip);
566 			goto again;
567 		}
568 		atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
569 		spin_unlock(&pmp->inum_spin);
570 	}
571 
572 	return (nip);
573 }
574 
575 /*
576  * Create a new inode in the specified directory using the vattr to
577  * figure out the type of inode.
578  *
579  * If no error occurs the new inode with its cluster locked is returned in
580  * *nipp, otherwise an error is returned and *nipp is set to NULL.
581  *
582  * If vap and/or cred are NULL the related fields are not set and the
583  * inode type defaults to a directory.  This is used when creating PFSs
584  * under the super-root, so the inode number is set to 1 in this case.
585  *
586  * dip is not locked on entry.
587  *
588  * NOTE: When used to create a snapshot, the inode is temporarily associated
589  *	 with the super-root spmp. XXX should pass new pmp for snapshot.
590  */
591 hammer2_inode_t *
592 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
593 		     struct vattr *vap, struct ucred *cred,
594 		     const uint8_t *name, size_t name_len,
595 		     hammer2_cluster_t **clusterp, int *errorp)
596 {
597 	const hammer2_inode_data_t *dipdata;
598 	hammer2_inode_data_t *nipdata;
599 	hammer2_cluster_t *cluster;
600 	hammer2_cluster_t *cparent;
601 	hammer2_inode_t *nip;
602 	hammer2_key_t key_dummy;
603 	hammer2_key_t lhc;
604 	int error;
605 	uid_t xuid;
606 	uuid_t dip_uid;
607 	uuid_t dip_gid;
608 	uint32_t dip_mode;
609 	uint8_t dip_comp_algo;
610 	uint8_t dip_check_algo;
611 	int ddflag;
612 
613 	lhc = hammer2_dirhash(name, name_len);
614 	*errorp = 0;
615 
616 	/*
617 	 * Locate the inode or indirect block to create the new
618 	 * entry in.  At the same time check for key collisions
619 	 * and iterate until we don't get one.
620 	 *
621 	 * NOTE: hidden inodes do not have iterators.
622 	 */
623 retry:
624 	cparent = hammer2_inode_lock_ex(dip);
625 	dipdata = &hammer2_cluster_data(cparent)->ipdata;
626 	dip_uid = dipdata->uid;
627 	dip_gid = dipdata->gid;
628 	dip_mode = dipdata->mode;
629 	dip_comp_algo = dipdata->comp_algo;
630 	dip_check_algo = dipdata->check_algo;
631 
632 	error = 0;
633 	while (error == 0) {
634 		cluster = hammer2_cluster_lookup(cparent, &key_dummy,
635 						 lhc, lhc, 0, &ddflag);
636 		if (cluster == NULL)
637 			break;
638 		if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
639 			error = ENOSPC;
640 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
641 			error = ENOSPC;
642 		hammer2_cluster_unlock(cluster);
643 		cluster = NULL;
644 		++lhc;
645 	}
646 
647 	if (error == 0) {
648 		error = hammer2_cluster_create(trans, cparent, &cluster,
649 					     lhc, 0,
650 					     HAMMER2_BREF_TYPE_INODE,
651 					     HAMMER2_INODE_BYTES,
652 					     0);
653 	}
654 #if INODE_DEBUG
655 	kprintf("CREATE INODE %*.*s chain=%p\n",
656 		(int)name_len, (int)name_len, name,
657 		(cluster ? cluster->focus : NULL));
658 #endif
659 
660 	/*
661 	 * Cleanup and handle retries.
662 	 */
663 	if (error == EAGAIN) {
664 		hammer2_cluster_ref(cparent);
665 		hammer2_inode_unlock_ex(dip, cparent);
666 		hammer2_cluster_wait(cparent);
667 		hammer2_cluster_drop(cparent);
668 		goto retry;
669 	}
670 	hammer2_inode_unlock_ex(dip, cparent);
671 	cparent = NULL;
672 
673 	if (error) {
674 		KKASSERT(cluster == NULL);
675 		*errorp = error;
676 		return (NULL);
677 	}
678 
679 	/*
680 	 * Set up the new inode.
681 	 *
682 	 * NOTE: *_get() integrates chain's lock into the inode lock.
683 	 *
684 	 * NOTE: Only one new inode can currently be created per
685 	 *	 transaction.  If the need arises we can adjust
686 	 *	 hammer2_trans_init() to allow more.
687 	 *
688 	 * NOTE: nipdata will have chain's blockset data.
689 	 */
690 	KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED);
691 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
692 	nipdata->inum = trans->inode_tid;
693 	hammer2_cluster_modsync(cluster);
694 	nip = hammer2_inode_get(dip->pmp, dip, cluster);
695 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
696 
697 	if (vap) {
698 		KKASSERT(trans->inodes_created == 0);
699 		nipdata->type = hammer2_get_obj_type(vap->va_type);
700 		nipdata->inum = trans->inode_tid;
701 		++trans->inodes_created;
702 
703 		switch (nipdata->type) {
704 		case HAMMER2_OBJTYPE_CDEV:
705 		case HAMMER2_OBJTYPE_BDEV:
706 			nipdata->rmajor = vap->va_rmajor;
707 			nipdata->rminor = vap->va_rminor;
708 			break;
709 		default:
710 			break;
711 		}
712 	} else {
713 		nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
714 		nipdata->inum = 1;
715 	}
716 
717 	/* Inherit parent's inode compression mode. */
718 	nip->comp_heuristic = 0;
719 	nipdata->comp_algo = dip_comp_algo;
720 	nipdata->check_algo = dip_check_algo;
721 	nipdata->version = HAMMER2_INODE_VERSION_ONE;
722 	hammer2_update_time(&nipdata->ctime);
723 	nipdata->mtime = nipdata->ctime;
724 	if (vap)
725 		nipdata->mode = vap->va_mode;
726 	nipdata->nlinks = 1;
727 	if (vap) {
728 		if (dip && dip->pmp) {
729 			xuid = hammer2_to_unix_xid(&dip_uid);
730 			xuid = vop_helper_create_uid(dip->pmp->mp,
731 						     dip_mode,
732 						     xuid,
733 						     cred,
734 						     &vap->va_mode);
735 		} else {
736 			/* super-root has no dip and/or pmp */
737 			xuid = 0;
738 		}
739 		if (vap->va_vaflags & VA_UID_UUID_VALID)
740 			nipdata->uid = vap->va_uid_uuid;
741 		else if (vap->va_uid != (uid_t)VNOVAL)
742 			hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
743 		else
744 			hammer2_guid_to_uuid(&nipdata->uid, xuid);
745 
746 		if (vap->va_vaflags & VA_GID_UUID_VALID)
747 			nipdata->gid = vap->va_gid_uuid;
748 		else if (vap->va_gid != (gid_t)VNOVAL)
749 			hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
750 		else if (dip)
751 			nipdata->gid = dip_gid;
752 	}
753 
754 	/*
755 	 * Regular files and softlinks allow a small amount of data to be
756 	 * directly embedded in the inode.  This flag will be cleared if
757 	 * the size is extended past the embedded limit.
758 	 */
759 	if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
760 	    nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
761 		nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
762 	}
763 
764 	KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
765 	bcopy(name, nipdata->filename, name_len);
766 	nipdata->name_key = lhc;
767 	nipdata->name_len = name_len;
768 	hammer2_cluster_modsync(cluster);
769 	*clusterp = cluster;
770 
771 	return (nip);
772 }
773 
774 /*
775  * The cluster has been removed from the original directory and replaced
776  * with a hardlink pointer.  Move the cluster to the specified parent
777  * directory, change the filename to "0xINODENUMBER", and adjust the key.
778  * The cluster becomes our invisible hardlink target.
779  *
780  * The original cluster must be deleted on entry.
781  */
782 static
783 void
784 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster,
785 			hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
786 			int nlinks, int *errorp)
787 {
788 	const hammer2_inode_data_t *iptmp;
789 	hammer2_inode_data_t *nipdata;
790 	hammer2_cluster_t *xcluster;
791 	hammer2_key_t key_dummy;
792 	hammer2_key_t lhc;
793 	hammer2_blockref_t bref;
794 	int ddflag;
795 
796 	iptmp = &hammer2_cluster_data(cluster)->ipdata;
797 	lhc = iptmp->inum;
798 	KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
799 
800 	/*
801 	 * Locate the inode or indirect block to create the new
802 	 * entry in.  lhc represents the inode number so there is
803 	 * no collision iteration.
804 	 *
805 	 * There should be no key collisions with invisible inode keys.
806 	 *
807 	 * WARNING! Must use inode_lock_ex() on dip to handle a stale
808 	 *	    dip->cluster cache.
809 	 */
810 	*errorp = 0;
811 	xcluster = hammer2_cluster_lookup(dcluster, &key_dummy,
812 				      lhc, lhc, 0, &ddflag);
813 	if (xcluster) {
814 		kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n",
815 			xcluster->focus, dip, dcluster->focus,
816 			dip->cluster.focus);
817 		hammer2_cluster_unlock(xcluster);
818 		xcluster = NULL;
819 		*errorp = ENOSPC;
820 #if 0
821 		Debugger("X3");
822 #endif
823 	}
824 
825 	/*
826 	 * Handle the error case
827 	 */
828 	if (*errorp) {
829 		panic("error2");
830 		KKASSERT(xcluster == NULL);
831 		return;
832 	}
833 
834 	/*
835 	 * Use xcluster as a placeholder for (lhc).  Duplicate cluster to the
836 	 * same target bref as xcluster and then delete xcluster.  The
837 	 * duplication occurs after xcluster in flush order even though
838 	 * xcluster is deleted after the duplication. XXX
839 	 *
840 	 * WARNING! Duplications (to a different parent) can cause indirect
841 	 *	    blocks to be inserted, refactor xcluster.
842 	 *
843 	 * WARNING! Only key and keybits is extracted from a passed-in bref.
844 	 */
845 	hammer2_cluster_bref(cluster, &bref);
846 	bref.key = lhc;			/* invisible dir entry key */
847 	bref.keybits = 0;
848 	hammer2_cluster_rename(trans, &bref, dcluster, cluster, 0);
849 
850 	/*
851 	 * cluster is now 'live' again.. adjust the filename.
852 	 *
853 	 * Directory entries are inodes but this is a hidden hardlink
854 	 * target.  The name isn't used but to ease debugging give it
855 	 * a name after its inode number.
856 	 */
857 	hammer2_cluster_modify(trans, cluster, 0);
858 	nipdata = &hammer2_cluster_wdata(cluster)->ipdata;
859 	ksnprintf(nipdata->filename, sizeof(nipdata->filename),
860 		  "0x%016jx", (intmax_t)nipdata->inum);
861 	nipdata->name_len = strlen(nipdata->filename);
862 	nipdata->name_key = lhc;
863 	nipdata->nlinks += nlinks;
864 	hammer2_cluster_modsync(cluster);
865 }
866 
867 /*
868  * Connect the target inode represented by (cluster) to the media topology
869  * at (dip, name, len).  The caller can pass a rough *chainp, this function
870  * will issue lookup()s to position the parent chain properly for the
871  * chain insertion.
872  *
873  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
874  * entry instead of connecting (cluster).
875  *
876  * If hlink is FALSE this function expects (cluster) to be unparented.
877  */
878 int
879 hammer2_inode_connect(hammer2_trans_t *trans,
880 		      hammer2_cluster_t **clusterp, int hlink,
881 		      hammer2_inode_t *dip, hammer2_cluster_t *dcluster,
882 		      const uint8_t *name, size_t name_len,
883 		      hammer2_key_t lhc)
884 {
885 	hammer2_inode_data_t *wipdata;
886 	hammer2_cluster_t *ocluster;
887 	hammer2_cluster_t *ncluster;
888 	hammer2_key_t key_dummy;
889 	int ddflag;
890 	int error;
891 
892 	/*
893 	 * Since ocluster is either disconnected from the topology or
894 	 * represents a hardlink terminus which is always a parent of or
895 	 * equal to dip, we should be able to safely lock dip->chain for
896 	 * our setup.
897 	 *
898 	 * WARNING! Must use inode_lock_ex() on dip to handle a stale
899 	 *	    dip->cluster.
900 	 *
901 	 * If name is non-NULL we calculate lhc, else we use the passed-in
902 	 * lhc.
903 	 */
904 	ocluster = *clusterp;
905 
906 	if (name) {
907 		lhc = hammer2_dirhash(name, name_len);
908 
909 		/*
910 		 * Locate the inode or indirect block to create the new
911 		 * entry in.  At the same time check for key collisions
912 		 * and iterate until we don't get one.
913 		 */
914 		error = 0;
915 		while (error == 0) {
916 			ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
917 						      lhc, lhc,
918 						      0, &ddflag);
919 			if (ncluster == NULL)
920 				break;
921 			if ((lhc & HAMMER2_DIRHASH_LOMASK) ==
922 			    HAMMER2_DIRHASH_LOMASK) {
923 				error = ENOSPC;
924 			}
925 			hammer2_cluster_unlock(ncluster);
926 			ncluster = NULL;
927 			++lhc;
928 		}
929 	} else {
930 		/*
931 		 * Reconnect to specific key (used when moving
932 		 * unlinked-but-open files into the hidden directory).
933 		 */
934 		ncluster = hammer2_cluster_lookup(dcluster, &key_dummy,
935 						  lhc, lhc,
936 						  0, &ddflag);
937 		KKASSERT(ncluster == NULL);
938 	}
939 
940 	if (error == 0) {
941 		if (hlink) {
942 			/*
943 			 * Hardlink pointer needed, create totally fresh
944 			 * directory entry.
945 			 *
946 			 * We must refactor ocluster because it might have
947 			 * been shifted into an indirect cluster by the
948 			 * create.
949 			 */
950 			KKASSERT(ncluster == NULL);
951 			error = hammer2_cluster_create(trans,
952 						       dcluster, &ncluster,
953 						       lhc, 0,
954 						       HAMMER2_BREF_TYPE_INODE,
955 						       HAMMER2_INODE_BYTES,
956 						       0);
957 		} else {
958 			/*
959 			 * Reconnect the original cluster under the new name.
960 			 * Original cluster must have already been deleted by
961 			 * teh caller.
962 			 *
963 			 * WARNING! Can cause held-over clusters to require a
964 			 *	    refactor.  Fortunately we have none (our
965 			 *	    locked clusters are passed into and
966 			 *	    modified by the call).
967 			 */
968 			ncluster = ocluster;
969 			ocluster = NULL;
970 			error = hammer2_cluster_create(trans,
971 						       dcluster, &ncluster,
972 						       lhc, 0,
973 						       HAMMER2_BREF_TYPE_INODE,
974 						       HAMMER2_INODE_BYTES,
975 						       0);
976 		}
977 	}
978 
979 	/*
980 	 * Unlock stuff.
981 	 */
982 	KKASSERT(error != EAGAIN);
983 
984 	/*
985 	 * ncluster should be NULL on error, leave ocluster
986 	 * (ocluster == *clusterp) alone.
987 	 */
988 	if (error) {
989 		KKASSERT(ncluster == NULL);
990 		return (error);
991 	}
992 
993 	/*
994 	 * Directory entries are inodes so if the name has changed we have
995 	 * to update the inode.
996 	 *
997 	 * When creating an OBJTYPE_HARDLINK entry remember to unlock the
998 	 * cluster, the caller will access the hardlink via the actual hardlink
999 	 * target file and not the hardlink pointer entry, so we must still
1000 	 * return ocluster.
1001 	 */
1002 	if (hlink && hammer2_hardlink_enable >= 0) {
1003 		/*
1004 		 * Create the HARDLINK pointer.  oip represents the hardlink
1005 		 * target in this situation.
1006 		 *
1007 		 * We will return ocluster (the hardlink target).
1008 		 */
1009 		hammer2_cluster_modify(trans, ncluster, 0);
1010 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1011 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1012 		bcopy(name, wipdata->filename, name_len);
1013 		wipdata->name_key = lhc;
1014 		wipdata->name_len = name_len;
1015 		wipdata->target_type =
1016 				hammer2_cluster_data(ocluster)->ipdata.type;
1017 		wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1018 		wipdata->inum = hammer2_cluster_data(ocluster)->ipdata.inum;
1019 		wipdata->version = HAMMER2_INODE_VERSION_ONE;
1020 		wipdata->nlinks = 1;
1021 		wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1022 		hammer2_cluster_modsync(ncluster);
1023 		hammer2_cluster_unlock(ncluster);
1024 		ncluster = ocluster;
1025 		ocluster = NULL;
1026 	} else {
1027 		/*
1028 		 * ncluster is a duplicate of ocluster at the new location.
1029 		 * We must fixup the name stored in the inode data.
1030 		 * The bref key has already been adjusted by inode_connect().
1031 		 */
1032 		hammer2_cluster_modify(trans, ncluster, 0);
1033 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1034 
1035 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
1036 		bcopy(name, wipdata->filename, name_len);
1037 		wipdata->name_key = lhc;
1038 		wipdata->name_len = name_len;
1039 		wipdata->nlinks = 1;
1040 		hammer2_cluster_modsync(ncluster);
1041 	}
1042 
1043 	/*
1044 	 * We are replacing ocluster with ncluster, unlock ocluster.  In the
1045 	 * case where ocluster is left unchanged the code above sets
1046 	 * ncluster to ocluster and ocluster to NULL, resulting in a NOP here.
1047 	 */
1048 	if (ocluster)
1049 		hammer2_cluster_unlock(ocluster);
1050 	*clusterp = ncluster;
1051 
1052 	return (0);
1053 }
1054 
1055 /*
1056  * Repoint ip->cluster's chains to cluster's chains.  Caller must hold
1057  * the inode exclusively locked.  cluster may be NULL to clean out any
1058  * chains in ip->cluster.
1059  */
1060 void
1061 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
1062 		      hammer2_cluster_t *cluster)
1063 {
1064 	hammer2_chain_t *ochain;
1065 	hammer2_chain_t *nchain;
1066 	hammer2_inode_t *opip;
1067 	int i;
1068 
1069 	/*
1070 	 * Replace chains in ip->cluster with chains from cluster and
1071 	 * adjust the focus if necessary.
1072 	 *
1073 	 * NOTE: nchain and/or ochain can be NULL due to gaps
1074 	 *	 in the cluster arrays.
1075 	 */
1076 	ip->cluster.focus = NULL;
1077 	for (i = 0; cluster && i < cluster->nchains; ++i) {
1078 		nchain = cluster->array[i];
1079 		if (i < ip->cluster.nchains) {
1080 			ochain = ip->cluster.array[i];
1081 			if (ochain == nchain) {
1082 				if (ip->cluster.focus == NULL)
1083 					ip->cluster.focus = nchain;
1084 				continue;
1085 			}
1086 		} else {
1087 			ochain = NULL;
1088 		}
1089 
1090 		/*
1091 		 * Make adjustments
1092 		 */
1093 		ip->cluster.array[i] = nchain;
1094 		if (ip->cluster.focus == NULL)
1095 			ip->cluster.focus = nchain;
1096 		if (nchain)
1097 			hammer2_chain_ref(nchain);
1098 		if (ochain)
1099 			hammer2_chain_drop(ochain);
1100 	}
1101 
1102 	/*
1103 	 * Release any left-over chains in ip->cluster.
1104 	 */
1105 	while (i < ip->cluster.nchains) {
1106 		nchain = ip->cluster.array[i];
1107 		if (nchain) {
1108 			ip->cluster.array[i] = NULL;
1109 			hammer2_chain_drop(nchain);
1110 		}
1111 		++i;
1112 	}
1113 	ip->cluster.nchains = cluster ? cluster->nchains : 0;
1114 
1115 	/*
1116 	 * Repoint ip->pip if requested (non-NULL pip).
1117 	 */
1118 	if (pip && ip->pip != pip) {
1119 		opip = ip->pip;
1120 		hammer2_inode_ref(pip);
1121 		ip->pip = pip;
1122 		if (opip)
1123 			hammer2_inode_drop(opip);
1124 	}
1125 }
1126 
1127 /*
1128  * Unlink the file from the specified directory inode.  The directory inode
1129  * does not need to be locked.
1130  *
1131  * isdir determines whether a directory/non-directory check should be made.
1132  * No check is made if isdir is set to -1.
1133  *
1134  * isopen specifies whether special unlink-with-open-descriptor handling
1135  * must be performed.  If set to -1 the caller is deleting a PFS and we
1136  * check whether the chain is mounted or not (chain->pmp != NULL).  1 is
1137  * implied if it is mounted.
1138  *
1139  * If isopen is 1 and nlinks drops to 0 this function must move the chain
1140  * to a special hidden directory until last-close occurs on the file.
1141  *
1142  * NOTE!  The underlying file can still be active with open descriptors
1143  *	  or if the chain is being manually held (e.g. for rename).
1144  *
1145  *	  The caller is responsible for fixing up ip->chain if e.g. a
1146  *	  rename occurs (see chain_duplicate()).
1147  *
1148  * NOTE!  The chain is not deleted if it is moved to the hidden directory,
1149  *	  but otherwise will be deleted.
1150  */
1151 int
1152 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
1153 		    const uint8_t *name, size_t name_len,
1154 		    int isdir, int *hlinkp, struct nchandle *nch,
1155 		    int nlinks)
1156 {
1157 	const hammer2_inode_data_t *ripdata;
1158 	hammer2_inode_data_t *wipdata;
1159 	hammer2_cluster_t *cparent;
1160 	hammer2_cluster_t *hcluster;
1161 	hammer2_cluster_t *hparent;
1162 	hammer2_cluster_t *cluster;
1163 	hammer2_cluster_t *dparent;
1164 	hammer2_cluster_t *dcluster;
1165 	hammer2_key_t key_dummy;
1166 	hammer2_key_t key_next;
1167 	hammer2_key_t lhc;
1168 	int error;
1169 	int ddflag;
1170 	int hlink;
1171 	uint8_t type;
1172 
1173 	error = 0;
1174 	hlink = 0;
1175 	hcluster = NULL;
1176 	hparent = NULL;
1177 	lhc = hammer2_dirhash(name, name_len);
1178 
1179 again:
1180 	/*
1181 	 * Search for the filename in the directory
1182 	 */
1183 	cparent = hammer2_inode_lock_ex(dip);
1184 	cluster = hammer2_cluster_lookup(cparent, &key_next,
1185 				     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1186 				     0, &ddflag);
1187 	while (cluster) {
1188 		if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1189 			ripdata = &hammer2_cluster_data(cluster)->ipdata;
1190 			if (ripdata->name_len == name_len &&
1191 			    bcmp(ripdata->filename, name, name_len) == 0) {
1192 				break;
1193 			}
1194 		}
1195 		cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1196 					       key_next,
1197 					       lhc + HAMMER2_DIRHASH_LOMASK,
1198 					       0);
1199 	}
1200 	hammer2_inode_unlock_ex(dip, NULL);	/* retain cparent */
1201 
1202 	/*
1203 	 * Not found or wrong type (isdir < 0 disables the type check).
1204 	 * If a hardlink pointer, type checks use the hardlink target.
1205 	 */
1206 	if (cluster == NULL) {
1207 		error = ENOENT;
1208 		goto done;
1209 	}
1210 	ripdata = &hammer2_cluster_data(cluster)->ipdata;
1211 	type = ripdata->type;
1212 	if (type == HAMMER2_OBJTYPE_HARDLINK) {
1213 		hlink = 1;
1214 		type = ripdata->target_type;
1215 	}
1216 
1217 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1218 		error = ENOTDIR;
1219 		goto done;
1220 	}
1221 	if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) {
1222 		error = EISDIR;
1223 		goto done;
1224 	}
1225 
1226 	/*
1227 	 * Hardlink must be resolved.  We can't hold the parent locked
1228 	 * while we do this or we could deadlock.  The physical file will
1229 	 * be located at or above the current directory.
1230 	 *
1231 	 * We loop to reacquire the hardlink origination.
1232 	 *
1233 	 * NOTE: hammer2_hardlink_find() will locate the hardlink target,
1234 	 *	 returning a modified hparent and hcluster.
1235 	 */
1236 	if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1237 		if (hcluster == NULL) {
1238 			hcluster = cluster;
1239 			cluster = NULL;	/* safety */
1240 			hammer2_cluster_unlock(cparent);
1241 			cparent = NULL; /* safety */
1242 			ripdata = NULL;	/* safety (associated w/cparent) */
1243 			error = hammer2_hardlink_find(dip, &hparent, hcluster);
1244 
1245 			/*
1246 			 * If we couldn't find the hardlink target then some
1247 			 * parent directory containing the hardlink pointer
1248 			 * probably got renamed to above the original target,
1249 			 * a case not yet handled by H2.
1250 			 */
1251 			if (error) {
1252 				kprintf("H2 unlink_file: hardlink target for "
1253 					"\"%s\" not found\n",
1254 					name);
1255 				kprintf("(likely due to known directory "
1256 					"rename bug)\n");
1257 				goto done;
1258 			}
1259 			goto again;
1260 		}
1261 	}
1262 
1263 	/*
1264 	 * If this is a directory the directory must be empty.  However, if
1265 	 * isdir < 0 we are doing a rename and the directory does not have
1266 	 * to be empty, and if isdir > 1 we are deleting a PFS/snapshot
1267 	 * and the directory does not have to be empty.
1268 	 *
1269 	 * NOTE: We check the full key range here which covers both visible
1270 	 *	 and invisible entries.  Theoretically there should be no
1271 	 *	 invisible (hardlink target) entries if there are no visible
1272 	 *	 entries.
1273 	 */
1274 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1275 		dparent = hammer2_cluster_lookup_init(cluster, 0);
1276 		dcluster = hammer2_cluster_lookup(dparent, &key_dummy,
1277 					          0, (hammer2_key_t)-1,
1278 					          HAMMER2_LOOKUP_NODATA,
1279 						  &ddflag);
1280 		if (dcluster) {
1281 			hammer2_cluster_unlock(dcluster);
1282 			hammer2_cluster_lookup_done(dparent);
1283 			error = ENOTEMPTY;
1284 			goto done;
1285 		}
1286 		hammer2_cluster_lookup_done(dparent);
1287 		dparent = NULL;
1288 		/* dcluster NULL */
1289 	}
1290 
1291 	/*
1292 	 * If this was a hardlink then (cparent, cluster) is the hardlink
1293 	 * pointer, which we can simply destroy outright.  Discard the
1294 	 * clusters and replace with the hardlink target.
1295 	 */
1296 	if (hcluster) {
1297 		hammer2_cluster_delete(trans, cparent, cluster,
1298 				       HAMMER2_DELETE_PERMANENT);
1299 		hammer2_cluster_unlock(cparent);
1300 		hammer2_cluster_unlock(cluster);
1301 		cparent = hparent;
1302 		cluster = hcluster;
1303 		hparent = NULL;
1304 		hcluster = NULL;
1305 	}
1306 
1307 	/*
1308 	 * This leaves us with the hardlink target or non-hardlinked file
1309 	 * or directory in (cparent, cluster).
1310 	 *
1311 	 * Delete the target when nlinks reaches 0 with special handling
1312 	 * if (isopen) is set.
1313 	 *
1314 	 * NOTE! In DragonFly the vnops function calls cache_unlink() after
1315 	 *	 calling us here to clean out the namecache association,
1316 	 *	 (which does not represent a ref for the open-test), and to
1317 	 *	 force finalization of the vnode if/when the last ref gets
1318 	 *	 dropped.
1319 	 *
1320 	 * NOTE! Files are unlinked by rename and then relinked.  nch will be
1321 	 *	 passed as NULL in this situation.  hammer2_inode_connect()
1322 	 *	 will bump nlinks.
1323 	 */
1324 	KKASSERT(cluster != NULL);
1325 	hammer2_cluster_modify(trans, cluster, 0);
1326 	wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1327 	ripdata = wipdata;
1328 	wipdata->nlinks += nlinks;
1329 	if ((int64_t)wipdata->nlinks < 0) {	/* XXX debugging */
1330 		wipdata->nlinks = 0;
1331 	}
1332 	hammer2_cluster_modsync(cluster);
1333 
1334 	if (wipdata->nlinks == 0) {
1335 		/*
1336 		 * Target nlinks has reached 0, file now unlinked (but may
1337 		 * still be open).
1338 		 */
1339 		/* XXX need interlock if mounted
1340 		if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) &&
1341 		    cluster->pmp) {
1342 			error = EINVAL;
1343 			kprintf("hammer2: PFS \"%s\" cannot be deleted "
1344 				"while still mounted\n",
1345 				wipdata->filename);
1346 			goto done;
1347 		}
1348 		*/
1349 		if (nch && cache_isopen(nch)) {
1350 			hammer2_inode_move_to_hidden(trans, &cparent, &cluster,
1351 						     wipdata->inum);
1352 		} else {
1353 			/*
1354 			 * This won't get everything if a vnode is still
1355 			 * present, but the cache_unlink() call the caller
1356 			 * makes will.
1357 			 */
1358 			hammer2_cluster_delete(trans, cparent, cluster,
1359 					       HAMMER2_DELETE_PERMANENT);
1360 		}
1361 	} else if (hlink == 0) {
1362 		/*
1363 		 * In this situation a normal non-hardlinked file (which can
1364 		 * only have nlinks == 1) still has a non-zero nlinks, the
1365 		 * caller must be doing a RENAME operation and so is passing
1366 		 * a nlinks adjustment of 0, and only wishes to remove file
1367 		 * in order to be able to reconnect it under a different name.
1368 		 *
1369 		 * In this situation we do a non-permanent deletion of the
1370 		 * chain in order to allow the file to be reconnected in
1371 		 * a different location.
1372 		 */
1373 		KKASSERT(nlinks == 0);
1374 		hammer2_cluster_delete(trans, cparent, cluster, 0);
1375 	}
1376 	error = 0;
1377 done:
1378 	if (cparent)
1379 		hammer2_cluster_unlock(cparent);
1380 	if (cluster)
1381 		hammer2_cluster_unlock(cluster);
1382 	if (hparent)
1383 		hammer2_cluster_unlock(hparent);
1384 	if (hcluster)
1385 		hammer2_cluster_unlock(hcluster);
1386 	if (hlinkp)
1387 		*hlinkp = hlink;
1388 
1389 	return error;
1390 }
1391 
1392 /*
1393  * This is called from the mount code to initialize pmp->ihidden
1394  */
1395 void
1396 hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp)
1397 {
1398 	hammer2_trans_t trans;
1399 	hammer2_cluster_t *cparent;
1400 	hammer2_cluster_t *cluster;
1401 	hammer2_cluster_t *scan;
1402 	const hammer2_inode_data_t *ripdata;
1403 	hammer2_inode_data_t *wipdata;
1404 	hammer2_key_t key_dummy;
1405 	hammer2_key_t key_next;
1406 	int ddflag;
1407 	int error;
1408 	int count;
1409 	int dip_check_algo;
1410 	int dip_comp_algo;
1411 
1412 	if (pmp->ihidden)
1413 		return;
1414 
1415 	/*
1416 	 * Find the hidden directory
1417 	 */
1418 	bzero(&key_dummy, sizeof(key_dummy));
1419 	hammer2_trans_init(&trans, pmp, 0);
1420 
1421 	/*
1422 	 * Setup for lookup, retrieve iroot's check and compression
1423 	 * algorithm request which was likely generated by newfs_hammer2.
1424 	 *
1425 	 * The check/comp fields will probably never be used since inodes
1426 	 * are renamed into the hidden directory and not created relative to
1427 	 * the hidden directory, chain creation inherits from bref.methods,
1428 	 * and data chains inherit from their respective file inode *_algo
1429 	 * fields.
1430 	 */
1431 	cparent = hammer2_inode_lock_ex(pmp->iroot);
1432 	ripdata = &hammer2_cluster_data(cparent)->ipdata;
1433 	dip_check_algo = ripdata->check_algo;
1434 	dip_comp_algo = ripdata->comp_algo;
1435 	ripdata = NULL;
1436 
1437 	cluster = hammer2_cluster_lookup(cparent, &key_dummy,
1438 					 HAMMER2_INODE_HIDDENDIR,
1439 					 HAMMER2_INODE_HIDDENDIR,
1440 					 0, &ddflag);
1441 	if (cluster) {
1442 		pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1443 		hammer2_inode_ref(pmp->ihidden);
1444 
1445 		/*
1446 		 * Remove any unlinked files which were left open as-of
1447 		 * any system crash.
1448 		 *
1449 		 * Don't pass NODATA, we need the inode data so the delete
1450 		 * can do proper statistics updates.
1451 		 */
1452 		count = 0;
1453 		scan = hammer2_cluster_lookup(cluster, &key_next,
1454 					      0, HAMMER2_TID_MAX,
1455 					      0, &ddflag);
1456 		while (scan) {
1457 			if (hammer2_cluster_type(scan) ==
1458 			    HAMMER2_BREF_TYPE_INODE) {
1459 				hammer2_cluster_delete(&trans, cluster, scan,
1460 						   HAMMER2_DELETE_PERMANENT);
1461 				++count;
1462 			}
1463 			scan = hammer2_cluster_next(cluster, scan, &key_next,
1464 						    0, HAMMER2_TID_MAX, 0);
1465 		}
1466 
1467 		hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1468 		hammer2_inode_unlock_ex(pmp->iroot, cparent);
1469 		hammer2_trans_done(&trans);
1470 		kprintf("hammer2: PFS loaded hidden dir, "
1471 			"removed %d dead entries\n", count);
1472 		return;
1473 	}
1474 
1475 	/*
1476 	 * Create the hidden directory
1477 	 */
1478 	error = hammer2_cluster_create(&trans, cparent, &cluster,
1479 				       HAMMER2_INODE_HIDDENDIR, 0,
1480 				       HAMMER2_BREF_TYPE_INODE,
1481 				       HAMMER2_INODE_BYTES,
1482 				       0);
1483 	hammer2_inode_unlock_ex(pmp->iroot, cparent);
1484 
1485 	hammer2_cluster_modify(&trans, cluster, 0);
1486 	wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1487 	wipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
1488 	wipdata->inum = HAMMER2_INODE_HIDDENDIR;
1489 	wipdata->nlinks = 1;
1490 	wipdata->comp_algo = dip_comp_algo;
1491 	wipdata->check_algo = dip_check_algo;
1492 	hammer2_cluster_modsync(cluster);
1493 	kprintf("hammer2: PFS root missing hidden directory, creating\n");
1494 
1495 	pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster);
1496 	hammer2_inode_ref(pmp->ihidden);
1497 	hammer2_inode_unlock_ex(pmp->ihidden, cluster);
1498 	hammer2_trans_done(&trans);
1499 }
1500 
1501 /*
1502  * If an open file is unlinked H2 needs to retain the file in the topology
1503  * to ensure that its backing store is not recovered by the bulk free scan.
1504  * This also allows us to avoid having to special-case the CHAIN_DELETED flag.
1505  *
1506  * To do this the file is moved to a hidden directory in the PFS root and
1507  * renamed.  The hidden directory must be created if it does not exist.
1508  */
1509 static
1510 void
1511 hammer2_inode_move_to_hidden(hammer2_trans_t *trans,
1512 			     hammer2_cluster_t **cparentp,
1513 			     hammer2_cluster_t **clusterp,
1514 			     hammer2_tid_t inum)
1515 {
1516 	hammer2_cluster_t *dcluster;
1517 	hammer2_pfsmount_t *pmp;
1518 	int error;
1519 
1520 	pmp = (*clusterp)->pmp;
1521 	KKASSERT(pmp != NULL);
1522 	KKASSERT(pmp->ihidden != NULL);
1523 
1524 	hammer2_cluster_delete(trans, *cparentp, *clusterp, 0);
1525 	dcluster = hammer2_inode_lock_ex(pmp->ihidden);
1526 	error = hammer2_inode_connect(trans, clusterp, 0,
1527 				      pmp->ihidden, dcluster,
1528 				      NULL, 0, inum);
1529 	hammer2_inode_unlock_ex(pmp->ihidden, dcluster);
1530 	KKASSERT(error == 0);
1531 }
1532 
1533 /*
1534  * Given an exclusively locked inode and cluster we consolidate the cluster
1535  * for hardlink creation, adding (nlinks) to the file's link count and
1536  * potentially relocating the inode to (cdip) which is a parent directory
1537  * common to both the current location of the inode and the intended new
1538  * hardlink.
1539  *
1540  * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster
1541  * and returning a new locked cluster.
1542  *
1543  * NOTE!  This function will also replace ip->cluster.
1544  */
1545 int
1546 hammer2_hardlink_consolidate(hammer2_trans_t *trans,
1547 			     hammer2_inode_t *ip,
1548 			     hammer2_cluster_t **clusterp,
1549 			     hammer2_inode_t *cdip,
1550 			     hammer2_cluster_t *cdcluster,
1551 			     int nlinks)
1552 {
1553 	const hammer2_inode_data_t *ripdata;
1554 	hammer2_inode_data_t *wipdata;
1555 	hammer2_cluster_t *cluster;
1556 	hammer2_cluster_t *cparent;
1557 	int error;
1558 
1559 	cluster = *clusterp;
1560 	ripdata = &hammer2_cluster_data(cluster)->ipdata;
1561 	if (nlinks == 0 &&			/* no hardlink needed */
1562 	    (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) {
1563 		return (0);
1564 	}
1565 
1566 	if (hammer2_hardlink_enable == 0) {	/* disallow hardlinks */
1567 		hammer2_cluster_unlock(cluster);
1568 		*clusterp = NULL;
1569 		return (ENOTSUP);
1570 	}
1571 
1572 	cparent = NULL;
1573 
1574 	/*
1575 	 * If no change in the hardlink's target directory is required and
1576 	 * this is already a hardlink target, all we need to do is adjust
1577 	 * the link count.
1578 	 */
1579 	ripdata = &hammer2_cluster_data(cluster)->ipdata;
1580 	if (cdip == ip->pip &&
1581 	    (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1582 		if (nlinks) {
1583 			hammer2_cluster_modify(trans, cluster, 0);
1584 			wipdata = &hammer2_cluster_wdata(cluster)->ipdata;
1585 			wipdata->nlinks += nlinks;
1586 			hammer2_cluster_modsync(cluster);
1587 			ripdata = wipdata;
1588 		}
1589 		error = 0;
1590 		goto done;
1591 	}
1592 
1593 	/*
1594 	 * Cluster is the real inode.  The originating directory is locked
1595 	 * by the caller so we can manipulate it without worrying about races
1596 	 * against other lookups.
1597 	 *
1598 	 * If cluster is visible we need to delete it from the current
1599 	 * location and create a hardlink pointer in its place.  If it is
1600 	 * not visible we need only delete it.  Then later cluster will be
1601 	 * renamed to a parent directory and converted (if necessary) to
1602 	 * a hidden inode (via shiftup).
1603 	 *
1604 	 * NOTE! We must hold cparent locked through the delete/create/rename
1605 	 *	 operation to ensure that other threads block resolving to
1606 	 *	 the same hardlink, otherwise the other threads may not see
1607 	 *	 the hardlink.
1608 	 */
1609 	KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0);
1610 	cparent = hammer2_cluster_parent(cluster);
1611 
1612 	hammer2_cluster_delete(trans, cparent, cluster, 0);
1613 
1614 	ripdata = &hammer2_cluster_data(cluster)->ipdata;
1615 	KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK);
1616 	if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) {
1617 		hammer2_cluster_t *ncluster;
1618 		hammer2_key_t lhc;
1619 
1620 		ncluster = NULL;
1621 		lhc = cluster->focus->bref.key;
1622 		error = hammer2_cluster_create(trans, cparent, &ncluster,
1623 					     lhc, 0,
1624 					     HAMMER2_BREF_TYPE_INODE,
1625 					     HAMMER2_INODE_BYTES,
1626 					     0);
1627 		hammer2_cluster_modify(trans, ncluster, 0);
1628 		wipdata = &hammer2_cluster_wdata(ncluster)->ipdata;
1629 
1630 		/* wipdata->comp_algo = ripdata->comp_algo; */
1631 		wipdata->comp_algo = 0;
1632 		wipdata->check_algo = 0;
1633 		wipdata->version = HAMMER2_INODE_VERSION_ONE;
1634 		wipdata->inum = ripdata->inum;
1635 		wipdata->target_type = ripdata->type;
1636 		wipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1637 		wipdata->uflags = 0;
1638 		wipdata->rmajor = 0;
1639 		wipdata->rminor = 0;
1640 		wipdata->ctime = 0;
1641 		wipdata->mtime = 0;
1642 		wipdata->atime = 0;
1643 		wipdata->btime = 0;
1644 		bzero(&wipdata->uid, sizeof(wipdata->uid));
1645 		bzero(&wipdata->gid, sizeof(wipdata->gid));
1646 		wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1647 		wipdata->cap_flags = 0;
1648 		wipdata->mode = 0;
1649 		wipdata->size = 0;
1650 		wipdata->nlinks = 1;
1651 		wipdata->iparent = 0;	/* XXX */
1652 		wipdata->pfs_type = 0;
1653 		wipdata->pfs_inum = 0;
1654 		bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid));
1655 		bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid));
1656 		wipdata->data_quota = 0;
1657 		wipdata->data_count = 0;
1658 		wipdata->inode_quota = 0;
1659 		wipdata->inode_count = 0;
1660 		wipdata->attr_tid = 0;
1661 		wipdata->dirent_tid = 0;
1662 		bzero(&wipdata->u, sizeof(wipdata->u));
1663 		bcopy(ripdata->filename, wipdata->filename, ripdata->name_len);
1664 		wipdata->name_key = ncluster->focus->bref.key;
1665 		wipdata->name_len = ripdata->name_len;
1666 		/* XXX transaction ids */
1667 		hammer2_cluster_modsync(ncluster);
1668 		hammer2_cluster_unlock(ncluster);
1669 	}
1670 	ripdata = wipdata;
1671 
1672 	/*
1673 	 * cluster represents the hardlink target and is now flagged deleted.
1674 	 * duplicate it to the parent directory and adjust nlinks.
1675 	 *
1676 	 * WARNING! The shiftup() call can cause ncluster to be moved into
1677 	 *	    an indirect block, and our ncluster will wind up pointing
1678 	 *	    to the older/original version.
1679 	 */
1680 	KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED);
1681 	hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster,
1682 				 nlinks, &error);
1683 
1684 	if (error == 0)
1685 		hammer2_inode_repoint(ip, cdip, cluster);
1686 
1687 done:
1688 	/*
1689 	 * Cleanup, cluster/ncluster already dealt with.
1690 	 *
1691 	 * Return the shifted cluster in *clusterp.
1692 	 */
1693 	if (cparent)
1694 		hammer2_cluster_unlock(cparent);
1695 	*clusterp = cluster;
1696 
1697 	return (error);
1698 }
1699 
1700 /*
1701  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1702  * inode while (*chainp) points to the resolved (hidden hardlink
1703  * target) inode.  In this situation when nlinks is 1 we wish to
1704  * deconsolidate the hardlink, moving it back to the directory that now
1705  * represents the only remaining link.
1706  */
1707 int
1708 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1709 			       hammer2_inode_t *dip,
1710 			       hammer2_chain_t **chainp,
1711 			       hammer2_chain_t **ochainp)
1712 {
1713 	if (*ochainp == NULL)
1714 		return (0);
1715 	/* XXX */
1716 	return (0);
1717 }
1718 
1719 /*
1720  * The caller presents a locked cluster with an obj_type of
1721  * HAMMER2_OBJTYPE_HARDLINK.  This routine will locate and replace the
1722  * cluster with the target hardlink, also locked.
1723  *
1724  * If cparentp is not NULL a locked cluster representing the hardlink's
1725  * parent is also returned.
1726  *
1727  * If we are unable to locate the hardlink target EIO is returned and
1728  * (*cparentp) is set to NULL.  The passed-in cluster still needs to be
1729  * unlocked by the caller but will be degenerate... not have any chains.
1730  */
1731 int
1732 hammer2_hardlink_find(hammer2_inode_t *dip,
1733 		      hammer2_cluster_t **cparentp, hammer2_cluster_t *cluster)
1734 {
1735 	const hammer2_inode_data_t *ipdata;
1736 	hammer2_cluster_t *cparent;
1737 	hammer2_cluster_t *rcluster;
1738 	hammer2_inode_t *ip;
1739 	hammer2_inode_t *pip;
1740 	hammer2_key_t key_dummy;
1741 	hammer2_key_t lhc;
1742 	int ddflag;
1743 
1744 	pip = dip;
1745 	hammer2_inode_ref(pip);		/* for loop */
1746 
1747 	/*
1748 	 * Locate the hardlink.  pip is referenced and not locked.
1749 	 */
1750 	ipdata = &hammer2_cluster_data(cluster)->ipdata;
1751 	lhc = ipdata->inum;
1752 
1753 	/*
1754 	 * We don't need the cluster's chains, but we need to retain the
1755 	 * cluster structure itself so we can load the hardlink search
1756 	 * result into it.
1757 	 */
1758 	KKASSERT(cluster->refs == 1);
1759 	atomic_add_int(&cluster->refs, 1);
1760 	hammer2_cluster_unlock(cluster);	/* hack */
1761 	cluster->nchains = 0;			/* hack */
1762 
1763 	rcluster = NULL;
1764 	cparent = NULL;
1765 
1766 	while ((ip = pip) != NULL) {
1767 		cparent = hammer2_inode_lock_ex(ip);
1768 		hammer2_inode_drop(ip);			/* loop */
1769 		KKASSERT(hammer2_cluster_type(cparent) ==
1770 			 HAMMER2_BREF_TYPE_INODE);
1771 		rcluster = hammer2_cluster_lookup(cparent, &key_dummy,
1772 					     lhc, lhc, 0, &ddflag);
1773 		if (rcluster)
1774 			break;
1775 		hammer2_cluster_lookup_done(cparent);	/* discard parent */
1776 		cparent = NULL;				/* safety */
1777 		pip = ip->pip;		/* safe, ip held locked */
1778 		if (pip)
1779 			hammer2_inode_ref(pip);		/* loop */
1780 		hammer2_inode_unlock_ex(ip, NULL);
1781 	}
1782 
1783 	/*
1784 	 * chain is locked, ip is locked.  Unlock ip, return the locked
1785 	 * chain.  *ipp is already set w/a ref count and not locked.
1786 	 *
1787 	 * (cparent is already unlocked).
1788 	 */
1789 	if (rcluster) {
1790 		hammer2_cluster_replace(cluster, rcluster);
1791 		hammer2_cluster_drop(rcluster);
1792 		if (cparentp) {
1793 			*cparentp = cparent;
1794 			hammer2_inode_unlock_ex(ip, NULL);
1795 		} else {
1796 			hammer2_inode_unlock_ex(ip, cparent);
1797 		}
1798 		return (0);
1799 	} else {
1800 		if (cparentp)
1801 			*cparentp = NULL;
1802 		if (ip)
1803 			hammer2_inode_unlock_ex(ip, cparent);
1804 		return (EIO);
1805 	}
1806 }
1807 
1808 /*
1809  * Find the directory common to both fdip and tdip.
1810  *
1811  * Returns a held but not locked inode.  Caller typically locks the inode,
1812  * and when through unlocks AND drops it.
1813  */
1814 hammer2_inode_t *
1815 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1816 {
1817 	hammer2_inode_t *scan1;
1818 	hammer2_inode_t *scan2;
1819 
1820 	/*
1821 	 * We used to have a depth field but it complicated matters too
1822 	 * much for directory renames.  So now its ugly.  Check for
1823 	 * simple cases before giving up and doing it the expensive way.
1824 	 *
1825 	 * XXX need a bottom-up topology stability lock
1826 	 */
1827 	if (fdip == tdip || fdip == tdip->pip) {
1828 		hammer2_inode_ref(fdip);
1829 		return(fdip);
1830 	}
1831 	if (fdip->pip == tdip) {
1832 		hammer2_inode_ref(tdip);
1833 		return(tdip);
1834 	}
1835 
1836 	/*
1837 	 * XXX not MPSAFE
1838 	 */
1839 	for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1840 		scan2 = tdip;
1841 		while (scan2->pmp == tdip->pmp) {
1842 			if (scan1 == scan2) {
1843 				hammer2_inode_ref(scan1);
1844 				return(scan1);
1845 			}
1846 			scan2 = scan2->pip;
1847 			if (scan2 == NULL)
1848 				break;
1849 		}
1850 	}
1851 	panic("hammer2_inode_common_parent: no common parent %p %p\n",
1852 	      fdip, tdip);
1853 	/* NOT REACHED */
1854 	return(NULL);
1855 }
1856 
1857 /*
1858  * Synchronize the inode's frontend state with the chain state prior
1859  * to any explicit flush of the inode or any strategy write call.
1860  *
1861  * Called with a locked inode.
1862  */
1863 void
1864 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip,
1865 		    hammer2_cluster_t *cparent)
1866 {
1867 	const hammer2_inode_data_t *ripdata;
1868 	hammer2_inode_data_t *wipdata;
1869 	hammer2_cluster_t *dparent;
1870 	hammer2_cluster_t *cluster;
1871 	hammer2_key_t lbase;
1872 	hammer2_key_t key_next;
1873 	int dosync = 0;
1874 	int ddflag;
1875 
1876 	ripdata = &hammer2_cluster_data(cparent)->ipdata;    /* target file */
1877 
1878 	if (ip->flags & HAMMER2_INODE_MTIME) {
1879 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1880 		atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME);
1881 		wipdata->mtime = ip->mtime;
1882 		dosync = 1;
1883 		ripdata = wipdata;
1884 	}
1885 	if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) {
1886 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1887 		wipdata->size = ip->size;
1888 		dosync = 1;
1889 		ripdata = wipdata;
1890 		atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1891 
1892 		/*
1893 		 * We must delete any chains beyond the EOF.  The chain
1894 		 * straddling the EOF will be pending in the bioq.
1895 		 */
1896 		lbase = (ripdata->size + HAMMER2_PBUFMASK64) &
1897 			~HAMMER2_PBUFMASK64;
1898 		dparent = hammer2_cluster_lookup_init(&ip->cluster, 0);
1899 		cluster = hammer2_cluster_lookup(dparent, &key_next,
1900 					         lbase, (hammer2_key_t)-1,
1901 						 HAMMER2_LOOKUP_NODATA,
1902 						 &ddflag);
1903 		while (cluster) {
1904 			/*
1905 			 * Degenerate embedded case, nothing to loop on
1906 			 */
1907 			switch (hammer2_cluster_type(cluster)) {
1908 			case HAMMER2_BREF_TYPE_INODE:
1909 				hammer2_cluster_unlock(cluster);
1910 				cluster = NULL;
1911 				break;
1912 			case HAMMER2_BREF_TYPE_DATA:
1913 				hammer2_cluster_delete(trans, dparent, cluster,
1914 						   HAMMER2_DELETE_PERMANENT);
1915 				/* fall through */
1916 			default:
1917 				cluster = hammer2_cluster_next(dparent, cluster,
1918 						   &key_next,
1919 						   key_next, (hammer2_key_t)-1,
1920 						   HAMMER2_LOOKUP_NODATA);
1921 				break;
1922 			}
1923 		}
1924 		hammer2_cluster_lookup_done(dparent);
1925 	} else
1926 	if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) {
1927 		wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0);
1928 		wipdata->size = ip->size;
1929 		atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED);
1930 
1931 		/*
1932 		 * When resizing larger we may not have any direct-data
1933 		 * available.
1934 		 */
1935 		if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1936 		    ip->size > HAMMER2_EMBEDDED_BYTES) {
1937 			wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1938 			bzero(&wipdata->u.blockset,
1939 			      sizeof(wipdata->u.blockset));
1940 		}
1941 		dosync = 1;
1942 		ripdata = wipdata;
1943 	}
1944 	if (dosync)
1945 		hammer2_cluster_modsync(cparent);
1946 }
1947