xref: /dragonfly/sys/vfs/hammer2/hammer2_inode.c (revision 3170ffd7)
1 /*
2  * Copyright (c) 2011-2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/types.h>
39 #include <sys/lock.h>
40 #include <sys/uuid.h>
41 
42 #include "hammer2.h"
43 
44 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp,
45 	     hammer2_tid_t, inum);
46 
47 int
48 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2)
49 {
50 	if (ip1->inum < ip2->inum)
51 		return(-1);
52 	if (ip1->inum > ip2->inum)
53 		return(1);
54 	return(0);
55 }
56 
57 hammer2_inode_t *
58 hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum)
59 {
60 	hammer2_inode_t *ip;
61 
62 	if (pmp) {
63 		spin_lock(&pmp->inum_spin);
64 		ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum);
65 		if (ip)
66 			hammer2_inode_ref(ip);
67 		spin_unlock(&pmp->inum_spin);
68 	} else {
69 		ip = NULL;
70 	}
71 	return(ip);
72 }
73 
74 /*
75  * Adding a ref to an inode is only legal if the inode already has at least
76  * one ref.
77  */
78 void
79 hammer2_inode_ref(hammer2_inode_t *ip)
80 {
81 	atomic_add_int(&ip->refs, 1);
82 }
83 
84 /*
85  * Drop an inode reference, freeing the inode when the last reference goes
86  * away.
87  */
88 void
89 hammer2_inode_drop(hammer2_inode_t *ip)
90 {
91 	hammer2_mount_t *hmp;
92 	hammer2_inode_t *pip;
93 	hammer2_chain_t *chain;
94 	u_int refs;
95 
96 	while (ip) {
97 		refs = ip->refs;
98 		cpu_ccfence();
99 		if (refs == 1) {
100 			/*
101 			 * Transition to zero, must interlock with
102 			 * the inode inumber lookup tree (if applicable).
103 			 *
104 			 * NOTE: The super-root inode has no pmp.
105 			 */
106 			if (ip->pmp)
107 				spin_lock(&ip->pmp->inum_spin);
108 
109 			if (atomic_cmpset_int(&ip->refs, 1, 0)) {
110 				KKASSERT(ip->topo_cst.count == 0);
111 				if (ip->flags & HAMMER2_INODE_ONRBTREE) {
112 					atomic_clear_int(&ip->flags,
113 						     HAMMER2_INODE_ONRBTREE);
114 					RB_REMOVE(hammer2_inode_tree,
115 						  &ip->pmp->inum_tree,
116 						  ip);
117 				}
118 				if (ip->pmp)
119 					spin_unlock(&ip->pmp->inum_spin);
120 
121 				hmp = ip->hmp;
122 				ip->hmp = NULL;
123 				pip = ip->pip;
124 				ip->pip = NULL;
125 				chain = ip->chain;
126 				ip->chain = NULL;
127 				if (chain)
128 					hammer2_chain_drop(chain);
129 
130 				/*
131 				 * We have to drop pip (if non-NULL) to
132 				 * dispose of our implied reference from
133 				 * ip->pip.  We can simply loop on it.
134 				 */
135 				kfree(ip, hmp->minode);
136 				ip = pip;
137 				/* continue with pip (can be NULL) */
138 			} else {
139 				if (ip->pmp)
140 					spin_unlock(&ip->pmp->inum_spin);
141 			}
142 		} else {
143 			/*
144 			 * Non zero transition
145 			 */
146 			if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
147 				break;
148 		}
149 	}
150 }
151 
152 /*
153  * Get the vnode associated with the given inode, allocating the vnode if
154  * necessary.  The vnode will be returned exclusively locked.
155  *
156  * The caller must lock the inode (shared or exclusive).
157  *
158  * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
159  * races.
160  */
161 struct vnode *
162 hammer2_igetv(hammer2_inode_t *ip, int *errorp)
163 {
164 	hammer2_inode_data_t *ipdata;
165 	hammer2_pfsmount_t *pmp;
166 	struct vnode *vp;
167 	ccms_state_t ostate;
168 
169 	pmp = ip->pmp;
170 	KKASSERT(pmp != NULL);
171 	*errorp = 0;
172 	ipdata = &ip->chain->data->ipdata;
173 
174 	for (;;) {
175 		/*
176 		 * Attempt to reuse an existing vnode assignment.  It is
177 		 * possible to race a reclaim so the vget() may fail.  The
178 		 * inode must be unlocked during the vget() to avoid a
179 		 * deadlock against a reclaim.
180 		 */
181 		vp = ip->vp;
182 		if (vp) {
183 			/*
184 			 * Inode must be unlocked during the vget() to avoid
185 			 * possible deadlocks, but leave the ip ref intact.
186 			 *
187 			 * vnode is held to prevent destruction during the
188 			 * vget().  The vget() can still fail if we lost
189 			 * a reclaim race on the vnode.
190 			 */
191 			vhold_interlocked(vp);
192 			ostate = hammer2_inode_lock_temp_release(ip);
193 			if (vget(vp, LK_EXCLUSIVE)) {
194 				vdrop(vp);
195 				hammer2_inode_lock_temp_restore(ip, ostate);
196 				continue;
197 			}
198 			hammer2_inode_lock_temp_restore(ip, ostate);
199 			vdrop(vp);
200 			/* vp still locked and ref from vget */
201 			if (ip->vp != vp) {
202 				kprintf("hammer2: igetv race %p/%p\n",
203 					ip->vp, vp);
204 				vput(vp);
205 				continue;
206 			}
207 			*errorp = 0;
208 			break;
209 		}
210 
211 		/*
212 		 * No vnode exists, allocate a new vnode.  Beware of
213 		 * allocation races.  This function will return an
214 		 * exclusively locked and referenced vnode.
215 		 */
216 		*errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
217 		if (*errorp) {
218 			kprintf("hammer2: igetv getnewvnode failed %d\n",
219 				*errorp);
220 			vp = NULL;
221 			break;
222 		}
223 
224 		/*
225 		 * Lock the inode and check for an allocation race.
226 		 */
227 		ostate = hammer2_inode_lock_upgrade(ip);
228 		if (ip->vp != NULL) {
229 			vp->v_type = VBAD;
230 			vx_put(vp);
231 			hammer2_inode_lock_downgrade(ip, ostate);
232 			continue;
233 		}
234 
235 		switch (ipdata->type) {
236 		case HAMMER2_OBJTYPE_DIRECTORY:
237 			vp->v_type = VDIR;
238 			break;
239 		case HAMMER2_OBJTYPE_REGFILE:
240 			vp->v_type = VREG;
241 			vinitvmio(vp, ipdata->size,
242 				  HAMMER2_LBUFSIZE,
243 				  (int)ipdata->size & HAMMER2_LBUFMASK);
244 			break;
245 		case HAMMER2_OBJTYPE_SOFTLINK:
246 			/*
247 			 * XXX for now we are using the generic file_read
248 			 * and file_write code so we need a buffer cache
249 			 * association.
250 			 */
251 			vp->v_type = VLNK;
252 			vinitvmio(vp, ipdata->size,
253 				  HAMMER2_LBUFSIZE,
254 				  (int)ipdata->size & HAMMER2_LBUFMASK);
255 			break;
256 		/* XXX FIFO */
257 		default:
258 			panic("hammer2: unhandled objtype %d", ipdata->type);
259 			break;
260 		}
261 
262 		if (ip == pmp->iroot)
263 			vsetflags(vp, VROOT);
264 
265 		vp->v_data = ip;
266 		ip->vp = vp;
267 		hammer2_inode_ref(ip);		/* vp association */
268 		hammer2_inode_lock_downgrade(ip, ostate);
269 		break;
270 	}
271 
272 	/*
273 	 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
274 	 */
275 	if (hammer2_debug & 0x0002) {
276 		kprintf("igetv vp %p refs %d aux %d\n",
277 			vp, vp->v_sysref.refcnt, vp->v_auxrefs);
278 	}
279 	return (vp);
280 }
281 
282 /*
283  * The passed-in chain must be locked and the returned inode will also be
284  * locked.  A ref is added to both the chain and the inode.  The chain lock
285  * is inherited by the inode structure and should not be separately released.
286  *
287  * The hammer2_inode structure regulates the interface between the high level
288  * kernel VNOPS API and the filesystem backend (the chains).
289  *
290  * WARNING!  This routine sucks up the chain's lock (makes it part of the
291  *	     inode lock from the point of view of the inode lock API),
292  *	     so callers need to be careful.
293  *
294  * WARNING!  The mount code is allowed to pass dip == NULL for iroot and
295  *	     is allowed to pass pmp == NULL and dip == NULL for sroot.
296  */
297 hammer2_inode_t *
298 hammer2_inode_get(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp,
299 		  hammer2_inode_t *dip, hammer2_chain_t *chain)
300 {
301 	hammer2_inode_t *nip;
302 	hammer2_chain_t *ochain;
303 
304 	KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
305 
306 	/*
307 	 * Interlocked lookup/ref of the inode.  This code is only needed
308 	 * when looking up inodes with nlinks != 0 (TODO: optimize out
309 	 * otherwise and test for duplicates).
310 	 */
311 again:
312 	for (;;) {
313 		nip = hammer2_inode_lookup(pmp, chain->data->ipdata.inum);
314 		if (nip == NULL)
315 			break;
316 		ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
317 		if ((nip->flags & HAMMER2_INODE_ONRBTREE) == 0) {
318 			ccms_thread_unlock(&nip->topo_cst);
319 			hammer2_inode_drop(nip);
320 			continue;
321 		}
322 		if (nip->chain != chain) {
323 			hammer2_chain_ref(chain);	/* new nip->chain */
324 			ochain = nip->chain;
325 			nip->chain = chain;		/* fully locked   */
326 			hammer2_chain_drop(ochain);	/* old nip->chain */
327 		}
328 		/*
329 		 * Consolidated nip/nip->chain is locked (chain locked
330 		 * by caller).
331 		 */
332 		return nip;
333 	}
334 
335 	/*
336 	 * We couldn't find the inode number, create a new inode.
337 	 */
338 	nip = kmalloc(sizeof(*nip), hmp->minode, M_WAITOK | M_ZERO);
339 	nip->inum = chain->data->ipdata.inum;
340 	nip->chain = chain;
341 	hammer2_chain_ref(chain);		/* nip->chain */
342 	nip->pip = dip;				/* can be NULL */
343 	if (dip)
344 		hammer2_inode_ref(dip);	/* ref dip for nip->pip */
345 
346 	nip->pmp = pmp;
347 	nip->hmp = hmp;
348 
349 	/*
350 	 * ref and lock on nip gives it state compatible to after a
351 	 * hammer2_inode_lock_ex() call.
352 	 */
353 	nip->refs = 1;
354 	ccms_cst_init(&nip->topo_cst, &nip->chain);
355 	ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
356 	/* combination of thread lock and chain lock == inode lock */
357 
358 	/*
359 	 * Attempt to add the inode.  If it fails we raced another inode
360 	 * get.  Undo all the work and try again.
361 	 */
362 	if (pmp) {
363 		spin_lock(&pmp->inum_spin);
364 		if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) {
365 			spin_unlock(&pmp->inum_spin);
366 			ccms_thread_unlock(&nip->topo_cst);
367 			hammer2_inode_drop(nip);
368 			goto again;
369 		}
370 		atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE);
371 		spin_unlock(&pmp->inum_spin);
372 	}
373 
374 	return (nip);
375 }
376 
377 /*
378  * Put away an inode, unlocking it and disconnecting it from its chain.
379  *
380  * The inode must be exclusively locked on call and non-recursed, with
381  * at least 2 refs (one belonging to the exclusive lock, and one additional
382  * ref belonging to the caller).
383  *
384  * Upon return the inode typically has one ref remaining which the caller
385  * drops.
386  */
387 void
388 hammer2_inode_put(hammer2_inode_t *ip)
389 {
390 	hammer2_inode_t *pip;
391 	hammer2_chain_t *chain;
392 
393 	/*
394 	 * Disconnect and unlock chain
395 	 */
396 	KKASSERT(ip->refs >= 2);
397 	KKASSERT(ip->topo_cst.count == -1);	/* one excl lock allowed */
398 	if ((chain = ip->chain) != NULL) {
399 		ip->chain = NULL;
400 		hammer2_inode_unlock_ex(ip);
401 		hammer2_chain_unlock(chain);	/* because ip->chain now NULL */
402 		hammer2_chain_drop(chain);	/* from *_get() */
403 	}
404 
405 	/*
406 	 * Disconnect pip
407 	 */
408 	if ((pip = ip->pip) != NULL) {
409 		ip->pip = NULL;
410 		hammer2_inode_drop(pip);
411 	}
412 }
413 
414 /*
415  * Create a new inode in the specified directory using the vattr to
416  * figure out the type of inode.
417  *
418  * If no error occurs the new inode with its chain locked is returned in
419  * *nipp, otherwise an error is returned and *nipp is set to NULL.
420  *
421  * If vap and/or cred are NULL the related fields are not set and the
422  * inode type defaults to a directory.  This is used when creating PFSs
423  * under the super-root, so the inode number is set to 1 in this case.
424  *
425  * dip is not locked on entry.
426  */
427 hammer2_inode_t *
428 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip,
429 		     struct vattr *vap, struct ucred *cred,
430 		     const uint8_t *name, size_t name_len,
431 		     int *errorp)
432 {
433 	hammer2_inode_data_t *dipdata;
434 	hammer2_inode_data_t *nipdata;
435 	hammer2_mount_t *hmp;
436 	hammer2_chain_t *chain;
437 	hammer2_chain_t *parent;
438 	hammer2_inode_t *nip;
439 	hammer2_key_t lhc;
440 	int error;
441 	uid_t xuid;
442 	uuid_t dip_uid;
443 	uuid_t dip_gid;
444 	uint32_t dip_mode;
445 
446 	hmp = dip->hmp;
447 	lhc = hammer2_dirhash(name, name_len);
448 	*errorp = 0;
449 
450 	/*
451 	 * Locate the inode or indirect block to create the new
452 	 * entry in.  At the same time check for key collisions
453 	 * and iterate until we don't get one.
454 	 *
455 	 * NOTE: hidden inodes do not have iterators.
456 	 */
457 retry:
458 	hammer2_inode_lock_ex(dip);
459 	dipdata = &dip->chain->data->ipdata;
460 	dip_uid = dipdata->uid;
461 	dip_gid = dipdata->gid;
462 	dip_mode = dipdata->mode;
463 
464 	parent = hammer2_chain_lookup_init(dip->chain, 0);
465 	error = 0;
466 	while (error == 0) {
467 		chain = hammer2_chain_lookup(&parent, lhc, lhc, 0);
468 		if (chain == NULL)
469 			break;
470 		if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
471 			error = ENOSPC;
472 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
473 			error = ENOSPC;
474 		hammer2_chain_unlock(chain);
475 		chain = NULL;
476 		++lhc;
477 	}
478 	if (error == 0) {
479 		error = hammer2_chain_create(trans, &parent, &chain,
480 					     lhc, 0,
481 					     HAMMER2_BREF_TYPE_INODE,
482 					     HAMMER2_INODE_BYTES);
483 	}
484 
485 	/*
486 	 * Cleanup and handle retries.
487 	 */
488 	if (error == EAGAIN) {
489 		hammer2_chain_ref(parent);
490 		hammer2_chain_lookup_done(parent);
491 		hammer2_inode_unlock_ex(dip);
492 		hammer2_chain_wait(parent);
493 		hammer2_chain_drop(parent);
494 		goto retry;
495 	}
496 	hammer2_chain_lookup_done(parent);
497 	hammer2_inode_unlock_ex(dip);
498 
499 	if (error) {
500 		KKASSERT(chain == NULL);
501 		*errorp = error;
502 		return (NULL);
503 	}
504 
505 	/*
506 	 * Set up the new inode.
507 	 *
508 	 * NOTE: *_get() integrates chain's lock into the inode lock.
509 	 *
510 	 * NOTE: Only one new inode can currently be created per
511 	 *	 transaction.  If the need arises we can adjust
512 	 *	 hammer2_trans_init() to allow more.
513 	 */
514 	chain->data->ipdata.inum = trans->sync_tid;
515 	nip = hammer2_inode_get(dip->hmp, dip->pmp, dip, chain);
516 	nipdata = &chain->data->ipdata;
517 
518 	if (vap) {
519 		KKASSERT(trans->inodes_created == 0);
520 		nipdata->type = hammer2_get_obj_type(vap->va_type);
521 		nipdata->inum = trans->sync_tid;
522 		++trans->inodes_created;
523 	} else {
524 		nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
525 		nipdata->inum = 1;
526 	}
527 	nipdata->version = HAMMER2_INODE_VERSION_ONE;
528 	hammer2_update_time(&nipdata->ctime);
529 	nipdata->mtime = nipdata->ctime;
530 	if (vap)
531 		nipdata->mode = vap->va_mode;
532 	nipdata->nlinks = 1;
533 	if (vap) {
534 		if (dip) {
535 			xuid = hammer2_to_unix_xid(&dip_uid);
536 			xuid = vop_helper_create_uid(dip->pmp->mp,
537 						     dip_mode,
538 						     xuid,
539 						     cred,
540 						     &vap->va_mode);
541 		} else {
542 			xuid = 0;
543 		}
544 		if (vap->va_vaflags & VA_UID_UUID_VALID)
545 			nipdata->uid = vap->va_uid_uuid;
546 		else if (vap->va_uid != (uid_t)VNOVAL)
547 			hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
548 		else
549 			hammer2_guid_to_uuid(&nipdata->uid, xuid);
550 
551 		if (vap->va_vaflags & VA_GID_UUID_VALID)
552 			nipdata->gid = vap->va_gid_uuid;
553 		else if (vap->va_gid != (gid_t)VNOVAL)
554 			hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
555 		else if (dip)
556 			nipdata->gid = dip_gid;
557 	}
558 
559 	/*
560 	 * Regular files and softlinks allow a small amount of data to be
561 	 * directly embedded in the inode.  This flag will be cleared if
562 	 * the size is extended past the embedded limit.
563 	 */
564 	if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
565 	    nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
566 		nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
567 	}
568 
569 	KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
570 	bcopy(name, nipdata->filename, name_len);
571 	nipdata->name_key = lhc;
572 	nipdata->name_len = name_len;
573 
574 	return (nip);
575 }
576 
577 /*
578  * chain may have been moved around by the create.
579  */
580 static
581 void
582 hammer2_chain_refactor(hammer2_chain_t **chainp)
583 {
584 	hammer2_chain_t *chain = *chainp;
585 	hammer2_chain_t *tmp;
586 
587 	while (chain->duplink && (chain->flags & HAMMER2_CHAIN_DELETED)) {
588 		tmp = chain->duplink;
589 		while (tmp->duplink && (tmp->flags & HAMMER2_CHAIN_DELETED))
590 			tmp = tmp->duplink;
591 		hammer2_chain_ref(chain);
592 		hammer2_chain_unlock(chain);
593 		hammer2_chain_lock(tmp, HAMMER2_RESOLVE_ALWAYS);
594 		hammer2_chain_drop(chain);
595 		chain = tmp;
596 		*chainp = chain;
597 	}
598 }
599 
600 
601 /*
602  * ochain represents the target file inode.  We need to move it to the
603  * specified common parent directory (dip) and rename it to a special
604  * invisible "0xINODENUMBER" filename.
605  *
606  * We use chain_duplicate and duplicate ochain at the new location,
607  * renaming it appropriately.  We create a temporary chain and
608  * then delete it to placemark where the duplicate will go.  Both of
609  * these use the inode number for (lhc) (the key), generating the
610  * invisible filename.
611  */
612 static
613 hammer2_chain_t *
614 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_chain_t **ochainp,
615 			hammer2_inode_t *dip, int *errorp)
616 {
617 	hammer2_inode_data_t *nipdata;
618 	hammer2_mount_t *hmp;
619 	hammer2_chain_t *parent;
620 	hammer2_chain_t *ochain;
621 	hammer2_chain_t *nchain;
622 	hammer2_chain_t *tmp;
623 	hammer2_key_t lhc;
624 	hammer2_blockref_t bref;
625 
626 	ochain = *ochainp;
627 	*errorp = 0;
628 	hmp = dip->hmp;
629 	lhc = ochain->data->ipdata.inum;
630 	KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
631 
632 	/*
633 	 * Locate the inode or indirect block to create the new
634 	 * entry in.  lhc represents the inode number so there is
635 	 * no collision iteration.
636 	 *
637 	 * There should be no key collisions with invisible inode keys.
638 	 */
639 retry:
640 	parent = hammer2_chain_lookup_init(dip->chain, 0);
641 	nchain = hammer2_chain_lookup(&parent, lhc, lhc, 0);
642 	if (nchain) {
643 		kprintf("X3 chain %p parent %p dip %p dip->chain %p\n",
644 			nchain, parent, dip, dip->chain);
645 		hammer2_chain_unlock(nchain);
646 		nchain = NULL;
647 		*errorp = ENOSPC;
648 #if 0
649 		Debugger("X3");
650 #endif
651 	}
652 
653 	/*
654 	 * Create entry in common parent directory using the seek position
655 	 * calculated above.
656 	 */
657 	if (*errorp == 0) {
658 		KKASSERT(nchain == NULL);
659 		*errorp = hammer2_chain_create(trans, &parent, &nchain,
660 					       lhc, 0,
661 					       HAMMER2_BREF_TYPE_INODE,/* n/a */
662 					       HAMMER2_INODE_BYTES);   /* n/a */
663 		hammer2_chain_refactor(&ochain);
664 		*ochainp = ochain;
665 	}
666 
667 	/*
668 	 * Cleanup and handle retries.
669 	 */
670 	if (*errorp == EAGAIN) {
671 		hammer2_chain_ref(parent);
672 		hammer2_chain_lookup_done(parent);
673 		hammer2_chain_wait(parent);
674 		hammer2_chain_drop(parent);
675 		goto retry;
676 	}
677 
678 	/*
679 	 * Handle the error case
680 	 */
681 	if (*errorp) {
682 		KKASSERT(nchain == NULL);
683 		hammer2_chain_lookup_done(parent);
684 		return (NULL);
685 	}
686 
687 	/*
688 	 * Use chain as a placeholder for (lhc), delete it and replace
689 	 * it with our duplication.
690 	 *
691 	 * Gain a second lock on ochain for the duplication function to
692 	 * unlock, maintain the caller's original lock across the call.
693 	 *
694 	 * This is a bit messy.
695 	 */
696 	hammer2_chain_delete(trans, parent, nchain);
697 	hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS);
698 	tmp = ochain;
699 	bref = tmp->bref;
700 	bref.key = lhc;			/* invisible dir entry key */
701 	bref.keybits = 0;
702 	hammer2_chain_duplicate(trans, parent, nchain->index, &tmp, &bref);
703 	hammer2_chain_lookup_done(parent);
704 	hammer2_chain_unlock(nchain);	/* no longer needed */
705 
706 	/*
707 	 * Now set chain to our duplicate and modify it appropriately.
708 	 *
709 	 * Directory entries are inodes but this is a hidden hardlink
710 	 * target.  The name isn't used but to ease debugging give it
711 	 * a name after its inode number.
712 	 */
713 	nchain = tmp;
714 	tmp = NULL;	/* safety */
715 
716 	hammer2_chain_modify(trans, &nchain, HAMMER2_MODIFY_ASSERTNOCOPY);
717 	nipdata = &nchain->data->ipdata;
718 	ksnprintf(nipdata->filename, sizeof(nipdata->filename),
719 		  "0x%016jx", (intmax_t)nipdata->inum);
720 	nipdata->name_len = strlen(nipdata->filename);
721 	nipdata->name_key = lhc;
722 
723 	return (nchain);
724 }
725 
726 /*
727  * Connect the target inode represented by (*chainp) to the media topology
728  * at (dip, name, len).
729  *
730  * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory
731  * entry instead of connecting (*chainp).
732  *
733  * If hlink is FALSE this function uses chain_duplicate() to make a copy
734  * if (*chainp) in the directory entry.  (*chainp) is likely to be deleted
735  * by the caller in this case (e.g. rename).
736  */
737 int
738 hammer2_inode_connect(hammer2_trans_t *trans, int hlink,
739 		      hammer2_inode_t *dip, hammer2_chain_t **chainp,
740 		      const uint8_t *name, size_t name_len)
741 {
742 	hammer2_inode_data_t *ipdata;
743 	hammer2_mount_t *hmp;
744 	hammer2_chain_t *nchain;
745 	hammer2_chain_t *parent;
746 	hammer2_chain_t *ochain;
747 	hammer2_key_t lhc;
748 	int error;
749 
750 	hmp = dip->hmp;
751 
752 	ochain = *chainp;
753 
754 	/*
755 	 * Since ochain is either disconnected from the topology or represents
756 	 * a hardlink terminus which is always a parent of or equal to dip,
757 	 * we should be able to safely lock dip->chain for our setup.
758 	 */
759 	parent = hammer2_chain_lookup_init(dip->chain, 0);
760 
761 	lhc = hammer2_dirhash(name, name_len);
762 
763 	/*
764 	 * Locate the inode or indirect block to create the new
765 	 * entry in.  At the same time check for key collisions
766 	 * and iterate until we don't get one.
767 	 */
768 	error = 0;
769 	while (error == 0) {
770 		nchain = hammer2_chain_lookup(&parent, lhc, lhc, 0);
771 		if (nchain == NULL)
772 			break;
773 		if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
774 			error = ENOSPC;
775 		hammer2_chain_unlock(nchain);
776 		nchain = NULL;
777 		++lhc;
778 	}
779 
780 	if (error == 0) {
781 		if (hlink) {
782 			/*
783 			 * Hardlink pointer needed, create totally fresh
784 			 * directory entry.
785 			 */
786 			KKASSERT(nchain == NULL);
787 			error = hammer2_chain_create(trans, &parent, &nchain,
788 						     lhc, 0,
789 						     HAMMER2_BREF_TYPE_INODE,
790 						     HAMMER2_INODE_BYTES);
791 			hammer2_chain_refactor(&ochain);
792 		} else {
793 			/*
794 			 * Reconnect the original chain and rename.  Use
795 			 * chain_duplicate().  The caller will likely delete
796 			 * or has already deleted the original chain in
797 			 * this case.
798 			 */
799 			nchain = ochain;
800 			ochain = NULL;
801 			hammer2_chain_duplicate(trans, NULL, -1, &nchain, NULL);
802 			error = hammer2_chain_create(trans, &parent, &nchain,
803 						     lhc, 0,
804 						     HAMMER2_BREF_TYPE_INODE,
805 						     HAMMER2_INODE_BYTES);
806 		}
807 	}
808 
809 	/*
810 	 * Unlock stuff.
811 	 */
812 	KKASSERT(error != EAGAIN);
813 	hammer2_chain_lookup_done(parent);
814 	parent = NULL;
815 
816 	/*
817 	 * nchain should be NULL on error, leave ochain (== *chainp) alone.
818 	 */
819 	if (error) {
820 		KKASSERT(nchain == NULL);
821 		return (error);
822 	}
823 
824 	/*
825 	 * Directory entries are inodes so if the name has changed we have
826 	 * to update the inode.
827 	 *
828 	 * When creating an OBJTYPE_HARDLINK entry remember to unlock the
829 	 * chain, the caller will access the hardlink via the actual hardlink
830 	 * target file and not the hardlink pointer entry, so we must still
831 	 * return ochain.
832 	 */
833 	if (hlink && hammer2_hardlink_enable >= 0) {
834 		/*
835 		 * Create the HARDLINK pointer.  oip represents the hardlink
836 		 * target in this situation.
837 		 *
838 		 * We will return ochain (the hardlink target).
839 		 */
840 		hammer2_chain_modify(trans, &nchain,
841 				     HAMMER2_MODIFY_ASSERTNOCOPY);
842 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
843 		ipdata = &nchain->data->ipdata;
844 		bcopy(name, ipdata->filename, name_len);
845 		ipdata->name_key = lhc;
846 		ipdata->name_len = name_len;
847 		ipdata->target_type = ochain->data->ipdata.type;
848 		ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
849 		ipdata->inum = ochain->data->ipdata.inum;
850 		ipdata->nlinks = 1;
851 		hammer2_chain_unlock(nchain);
852 		nchain = ochain;
853 		ochain = NULL;
854 	} else if (hlink && hammer2_hardlink_enable < 0) {
855 		/*
856 		 * Create a snapshot (hardlink fake mode for debugging).
857 		 * (ochain already flushed above so we can just copy the
858 		 * bref XXX).
859 		 *
860 		 * Since this is a snapshot we return nchain in the fake
861 		 * hardlink case.
862 		 */
863 		hammer2_chain_modify(trans, &nchain,
864 				     HAMMER2_MODIFY_ASSERTNOCOPY);
865 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
866 		ipdata = &nchain->data->ipdata;
867 		*ipdata = ochain->data->ipdata;
868 		bcopy(name, ipdata->filename, name_len);
869 		ipdata->name_key = lhc;
870 		ipdata->name_len = name_len;
871 		kprintf("created fake hardlink %*.*s\n",
872 			(int)name_len, (int)name_len, name);
873 	} else {
874 		/*
875 		 * nchain is a duplicate of ochain at the new location.
876 		 * We must fixup the name stored in oip.  The bref key
877 		 * has already been set up.
878 		 */
879 		hammer2_chain_modify(trans, &nchain,
880 				     HAMMER2_MODIFY_ASSERTNOCOPY);
881 		ipdata = &nchain->data->ipdata;
882 
883 		KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
884 		bcopy(name, ipdata->filename, name_len);
885 		ipdata->name_key = lhc;
886 		ipdata->name_len = name_len;
887 		ipdata->nlinks = 1;
888 	}
889 
890 	/*
891 	 * We are replacing ochain with nchain, unlock ochain.  In the
892 	 * case where ochain is left unchanged the code above sets
893 	 * nchain to ochain and ochain to NULL, resulting in a NOP here.
894 	 */
895 	if (ochain)
896 		hammer2_chain_unlock(ochain);
897 	*chainp = nchain;
898 
899 	return (0);
900 }
901 
902 /*
903  * Caller must hold exactly ONE exclusive lock on the inode.  *nchainp
904  * must be exclusive locked (its own exclusive lock even if it is the
905  * same as ip->chain).
906  *
907  * This function replaces ip->chain.  The exclusive lock on the passed
908  * nchain is inherited by the inode and the caller becomes responsible
909  * for unlocking it when the caller unlocks the inode.
910  *
911  * ochain was locked by the caller indirectly via the inode lock.  Since
912  * ip->chain is being repointed, we become responsible for cleaning up
913  * that lock.
914  *
915  * Return *nchainp = NULL as a safety.
916  */
917 void
918 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip,
919 		      hammer2_chain_t *nchain)
920 {
921 	hammer2_chain_t *ochain;
922 	hammer2_inode_t *opip;
923 
924 	/*
925 	 * ip->chain points to the hardlink target, not the hardlink psuedo
926 	 * inode.  Do not repoint nchain to the pseudo-node.
927 	 */
928 	if (nchain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK)
929 		return;
930 
931 	/*
932 	 * Repoint ip->chain if necessary.
933 	 *
934 	 * (Inode must be locked exclusively by parent)
935 	 */
936 	ochain = ip->chain;
937 	if (ochain != nchain) {
938 		hammer2_chain_ref(nchain);		/* for ip->chain */
939 		ip->chain = nchain;
940 		if (ochain) {
941 			hammer2_chain_unlock(ochain);
942 			hammer2_chain_drop(ochain);	/* for ip->chain */
943 		}
944 		/* replace locked chain in ip (additional lock) */
945 		hammer2_chain_lock(nchain, HAMMER2_RESOLVE_ALWAYS);
946 	}
947 	if (ip->pip != pip) {
948 		opip = ip->pip;
949 		if (pip)
950 			hammer2_inode_ref(pip);
951 		ip->pip = pip;
952 		if (opip)
953 			hammer2_inode_drop(opip);
954 	}
955 }
956 
957 /*
958  * Unlink the file from the specified directory inode.  The directory inode
959  * does not need to be locked.  The caller should pass a non-NULL (ip)
960  * representing the object being removed only if the related vnode is
961  * potentially inactive (not referenced in the caller's active path),
962  * so we can vref/vrele it to trigger the VOP_INACTIVE path and properly
963  * recycle it.
964  *
965  * isdir determines whether a directory/non-directory check should be made.
966  * No check is made if isdir is set to -1.
967  *
968  * NOTE!  This function does not prevent the underlying file from still
969  *	  being used if it has other refs (such as from an inode, or if it's
970  *	  chain is manually held).  However, the caller is responsible for
971  *	  fixing up ip->chain if e.g. a rename occurs (see chain_duplicate()).
972  */
973 int
974 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip,
975 		    const uint8_t *name, size_t name_len,
976 		    int isdir, int *hlinkp)
977 {
978 	hammer2_inode_data_t *ipdata;
979 	hammer2_mount_t *hmp;
980 	hammer2_chain_t *parent;
981 	hammer2_chain_t *ochain;
982 	hammer2_chain_t *chain;
983 	hammer2_chain_t *dparent;
984 	hammer2_chain_t *dchain;
985 	hammer2_key_t lhc;
986 	int error;
987 	int parent_ref;
988 	uint8_t type;
989 
990 	parent_ref = 0;
991 	error = 0;
992 	ochain = NULL;
993 	hmp = dip->hmp;
994 	lhc = hammer2_dirhash(name, name_len);
995 
996 	/*
997 	 * Search for the filename in the directory
998 	 */
999 	if (hlinkp)
1000 		*hlinkp = 0;
1001 	hammer2_inode_lock_ex(dip);
1002 
1003 	parent = hammer2_chain_lookup_init(dip->chain, 0);
1004 	chain = hammer2_chain_lookup(&parent,
1005 				     lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1006 				     0);
1007 	while (chain) {
1008 		if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
1009 		    name_len == chain->data->ipdata.name_len &&
1010 		    bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
1011 			break;
1012 		}
1013 		chain = hammer2_chain_next(&parent, chain,
1014 					   lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1015 					   0);
1016 	}
1017 	hammer2_inode_unlock_ex(dip);	/* retain parent */
1018 
1019 	/*
1020 	 * Not found or wrong type (isdir < 0 disables the type check).
1021 	 * If a hardlink pointer, type checks use the hardlink target.
1022 	 */
1023 	if (chain == NULL) {
1024 		error = ENOENT;
1025 		goto done;
1026 	}
1027 	if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK) {
1028 		if (hlinkp)
1029 			*hlinkp = 1;
1030 		type = chain->data->ipdata.target_type;
1031 	}
1032 
1033 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
1034 		error = ENOTDIR;
1035 		goto done;
1036 	}
1037 	if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
1038 		error = EISDIR;
1039 		goto done;
1040 	}
1041 
1042 	/*
1043 	 * Hardlink must be resolved.  We can't hold parent locked while we
1044 	 * do this or we could deadlock.
1045 	 *
1046 	 * On success chain will be adjusted to point at the hardlink target
1047 	 * and ochain will point to the hardlink pointer in the original
1048 	 * directory.  Otherwise chain remains pointing to the original.
1049 	 */
1050 	if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
1051 		KKASSERT(parent_ref == 0);
1052 		hammer2_chain_unlock(parent);
1053 		parent = NULL;
1054 		error = hammer2_hardlink_find(dip, &chain, &ochain);
1055 	}
1056 
1057 	/*
1058 	 * If this is a directory the directory must be empty.  However, if
1059 	 * isdir < 0 we are doing a rename and the directory does not have
1060 	 * to be empty.
1061 	 *
1062 	 * NOTE: We check the full key range here which covers both visible
1063 	 *	 and invisible entries.  Theoretically there should be no
1064 	 *	 invisible (hardlink target) entries if there are no visible
1065 	 *	 entries.
1066 	 */
1067 	if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) {
1068 		dparent = hammer2_chain_lookup_init(chain, 0);
1069 		dchain = hammer2_chain_lookup(&dparent,
1070 					      0, (hammer2_key_t)-1,
1071 					      HAMMER2_LOOKUP_NODATA);
1072 		if (dchain) {
1073 			hammer2_chain_unlock(dchain);
1074 			hammer2_chain_lookup_done(dparent);
1075 			error = ENOTEMPTY;
1076 			goto done;
1077 		}
1078 		hammer2_chain_lookup_done(dparent);
1079 		dparent = NULL;
1080 		/* dchain NULL */
1081 	}
1082 
1083 	/*
1084 	 * Ok, we can now unlink the chain.  We always decrement nlinks even
1085 	 * if the entry can be deleted in case someone has the file open and
1086 	 * does an fstat().
1087 	 *
1088 	 * The chain itself will no longer be in the on-media topology but
1089 	 * can still be flushed to the media (e.g. if an open descriptor
1090 	 * remains).  When the last vnode/ip ref goes away the chain will
1091 	 * be marked unmodified, avoiding any further (now unnecesary) I/O.
1092 	 *
1093 	 * A non-NULL ochain indicates a hardlink.
1094 	 */
1095 	if (ochain) {
1096 		/*
1097 		 * Delete the original hardlink pointer.
1098 		 *
1099 		 * NOTE: parent from above is NULL when ochain != NULL
1100 		 *	 so we can reuse it.
1101 		 */
1102 		hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS);
1103 		parent_ref = 1;
1104 		for (;;) {
1105 			parent = ochain->parent;
1106 			hammer2_chain_ref(parent);
1107 			hammer2_chain_unlock(ochain);
1108 			hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
1109 			hammer2_chain_lock(ochain, HAMMER2_RESOLVE_ALWAYS);
1110 			if (ochain->parent == parent)
1111 				break;
1112 			hammer2_chain_unlock(parent);
1113 			hammer2_chain_drop(parent);
1114 		}
1115 
1116 		hammer2_chain_delete(trans, parent, ochain);
1117 		hammer2_chain_unlock(ochain);
1118 		hammer2_chain_unlock(parent);
1119 		hammer2_chain_drop(parent);
1120 		parent = NULL;
1121 
1122 		/*
1123 		 * Then decrement nlinks on hardlink target, deleting
1124 		 * the target when nlinks drops to 0.
1125 		 */
1126 		if (chain->data->ipdata.nlinks == 1) {
1127 			dparent = chain->parent;
1128 			hammer2_chain_ref(chain);
1129 			hammer2_chain_unlock(chain);
1130 			hammer2_chain_lock(dparent, HAMMER2_RESOLVE_ALWAYS);
1131 			hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
1132 			hammer2_chain_drop(chain);
1133 			hammer2_chain_modify(trans, &chain, 0);
1134 			--chain->data->ipdata.nlinks;
1135 			hammer2_chain_delete(trans, dparent, chain);
1136 			hammer2_chain_unlock(dparent);
1137 		} else {
1138 			hammer2_chain_modify(trans, &chain, 0);
1139 			--chain->data->ipdata.nlinks;
1140 		}
1141 	} else {
1142 		/*
1143 		 * Otherwise this was not a hardlink and we can just
1144 		 * remove the entry and decrement nlinks.
1145 		 *
1146 		 * NOTE: *_get() integrates chain's lock into the inode lock.
1147 		 */
1148 		hammer2_chain_modify(trans, &chain, 0);
1149 		ipdata = &chain->data->ipdata;
1150 		--ipdata->nlinks;
1151 		hammer2_chain_delete(trans, parent, chain);
1152 	}
1153 
1154 	error = 0;
1155 done:
1156 	if (chain)
1157 		hammer2_chain_unlock(chain);
1158 	if (parent) {
1159 		hammer2_chain_lookup_done(parent);
1160 		if (parent_ref)
1161 			hammer2_chain_drop(parent);
1162 	}
1163 	if (ochain)
1164 		hammer2_chain_drop(ochain);
1165 
1166 	return error;
1167 }
1168 
1169 /*
1170  * Calculate the allocation size for the file fragment straddling EOF
1171  */
1172 int
1173 hammer2_inode_calc_alloc(hammer2_key_t filesize)
1174 {
1175 	int frag = (int)filesize & HAMMER2_PBUFMASK;
1176 	int radix;
1177 
1178 	if (frag == 0)
1179 		return(0);
1180 	for (radix = HAMMER2_MINALLOCRADIX; frag > (1 << radix); ++radix)
1181 		;
1182 	return (radix);
1183 }
1184 
1185 /*
1186  * Given an exclusively locked inode we consolidate its chain for hardlink
1187  * creation, adding (nlinks) to the file's link count and potentially
1188  * relocating the inode to a directory common to ip->pip and tdip.
1189  *
1190  * Returns a locked chain in (*chainp) (the chain's lock is in addition to
1191  * any lock it might already have due to the inode being locked).  *chainp
1192  * is set unconditionally and its previous contents can be garbage.
1193  *
1194  * The caller is responsible for replacing ip->chain, not us.  For certain
1195  * operations such as renames the caller may do additional manipulation
1196  * of the chain before replacing ip->chain.
1197  */
1198 int
1199 hammer2_hardlink_consolidate(hammer2_trans_t *trans, hammer2_inode_t *ip,
1200 			     hammer2_chain_t **chainp,
1201 			     hammer2_inode_t *tdip, int nlinks)
1202 {
1203 	hammer2_inode_data_t *ipdata;
1204 	hammer2_mount_t *hmp;
1205 	hammer2_inode_t *fdip;
1206 	hammer2_inode_t *cdip;
1207 	hammer2_chain_t *chain;
1208 	hammer2_chain_t *nchain;
1209 	hammer2_chain_t *parent;
1210 	int error;
1211 
1212 	/*
1213 	 * Extra lock on chain so it can be returned locked.
1214 	 */
1215 	hmp = tdip->hmp;
1216 
1217 	chain = ip->chain;
1218 	error = hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
1219 	KKASSERT(error == 0);
1220 
1221 	if (nlinks == 0 &&			/* no hardlink needed */
1222 	    (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1223 		*chainp = chain;
1224 		return (0);
1225 	}
1226 	if (hammer2_hardlink_enable < 0) {	/* fake hardlinks */
1227 		*chainp = chain;
1228 		return (0);
1229 	}
1230 
1231 	if (hammer2_hardlink_enable == 0) {	/* disallow hardlinks */
1232 		hammer2_chain_unlock(chain);
1233 		*chainp = NULL;
1234 		return (ENOTSUP);
1235 	}
1236 
1237 	/*
1238 	 * cdip will be returned with a ref, but not locked.
1239 	 */
1240 	fdip = ip->pip;
1241 	cdip = hammer2_inode_common_parent(fdip, tdip);
1242 
1243 	/*
1244 	 * If no change in the hardlink's target directory is required and
1245 	 * this is already a hardlink target, all we need to do is adjust
1246 	 * the link count.
1247 	 *
1248 	 * XXX The common parent is a big wiggly due to duplication from
1249 	 *     renames.  Compare the core (RBTREE) pointer instead of the
1250 	 *     ip's.
1251 	 */
1252 	if (cdip == fdip &&
1253 	    (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1254 		if (nlinks) {
1255 			hammer2_chain_modify(trans, &chain, 0);
1256 			chain->data->ipdata.nlinks += nlinks;
1257 		}
1258 		*chainp = chain;
1259 		error = 0;
1260 		goto done;
1261 	}
1262 
1263 	/*
1264 	 * We either have to move an existing hardlink target or we have
1265 	 * to create a fresh hardlink target.
1266 	 *
1267 	 * Hardlink targets are hidden inodes in a parent directory common
1268 	 * to all directory entries referencing the hardlink.
1269 	 */
1270 	nchain = hammer2_hardlink_shiftup(trans, &chain, cdip, &error);
1271 
1272 	if (error == 0) {
1273 		/*
1274 		 * Bump nlinks on duplicated hidden inode, repoint
1275 		 * ip->chain.
1276 		 */
1277 		hammer2_chain_modify(trans, &nchain, 0);
1278 		nchain->data->ipdata.nlinks += nlinks;
1279 		hammer2_inode_repoint(ip, cdip, nchain);
1280 
1281 		/*
1282 		 * If the old chain is not a hardlink target then replace
1283 		 * it with a OBJTYPE_HARDLINK pointer.
1284 		 *
1285 		 * If the old chain IS a hardlink target then delete it.
1286 		 */
1287 		if (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) {
1288 			/*
1289 			 * Replace original non-hardlink that's been dup'd
1290 			 * with a special hardlink directory entry.  We must
1291 			 * set the DIRECTDATA flag to prevent sub-chains
1292 			 * from trying to synchronize to the inode if the
1293 			 * file is extended afterwords.
1294 			 */
1295 			hammer2_chain_modify(trans, &chain, 0);
1296 			ipdata = &chain->data->ipdata;
1297 			ipdata->target_type = ipdata->type;
1298 			ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1299 			ipdata->uflags = 0;
1300 			ipdata->rmajor = 0;
1301 			ipdata->rminor = 0;
1302 			ipdata->ctime = 0;
1303 			ipdata->mtime = 0;
1304 			ipdata->atime = 0;
1305 			ipdata->btime = 0;
1306 			bzero(&ipdata->uid, sizeof(ipdata->uid));
1307 			bzero(&ipdata->gid, sizeof(ipdata->gid));
1308 			ipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1309 			ipdata->cap_flags = 0;
1310 			ipdata->mode = 0;
1311 			ipdata->size = 0;
1312 			ipdata->nlinks = 1;
1313 			ipdata->iparent = 0;	/* XXX */
1314 			ipdata->pfs_type = 0;
1315 			ipdata->pfs_inum = 0;
1316 			bzero(&ipdata->pfs_clid, sizeof(ipdata->pfs_clid));
1317 			bzero(&ipdata->pfs_fsid, sizeof(ipdata->pfs_fsid));
1318 			ipdata->data_quota = 0;
1319 			ipdata->data_count = 0;
1320 			ipdata->inode_quota = 0;
1321 			ipdata->inode_count = 0;
1322 			ipdata->attr_tid = 0;
1323 			ipdata->dirent_tid = 0;
1324 			bzero(&ipdata->u, sizeof(ipdata->u));
1325 			/* XXX transaction ids */
1326 		} else {
1327 			kprintf("DELETE INVISIBLE\n");
1328 			for (;;) {
1329 				parent = chain->parent;
1330 				hammer2_chain_ref(parent);
1331 				hammer2_chain_ref(chain);
1332 				hammer2_chain_unlock(chain);
1333 				hammer2_chain_lock(parent,
1334 						   HAMMER2_RESOLVE_ALWAYS);
1335 				hammer2_chain_lock(chain,
1336 						   HAMMER2_RESOLVE_ALWAYS);
1337 				hammer2_chain_drop(chain);
1338 				if (chain->parent == parent)
1339 					break;
1340 				hammer2_chain_unlock(parent);
1341 				hammer2_chain_drop(parent);
1342 			}
1343 			hammer2_chain_delete(trans, parent, chain);
1344 			hammer2_chain_unlock(parent);
1345 			hammer2_chain_drop(parent);
1346 		}
1347 
1348 		/*
1349 		 * Return the new chain.
1350 		 */
1351 		hammer2_chain_unlock(chain);
1352 		*chainp = nchain;
1353 	} else {
1354 		/*
1355 		 * Return an error
1356 		 */
1357 		hammer2_chain_unlock(chain);
1358 		*chainp = NULL;
1359 	}
1360 
1361 	/*
1362 	 * Cleanup, chain/nchain already dealt with.
1363 	 */
1364 done:
1365 	hammer2_inode_drop(cdip);
1366 
1367 	return (error);
1368 }
1369 
1370 /*
1371  * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1372  * inode while (*chainp) points to the resolved (hidden hardlink
1373  * target) inode.  In this situation when nlinks is 1 we wish to
1374  * deconsolidate the hardlink, moving it back to the directory that now
1375  * represents the only remaining link.
1376  */
1377 int
1378 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans,
1379 			       hammer2_inode_t *dip,
1380 			       hammer2_chain_t **chainp,
1381 			       hammer2_chain_t **ochainp)
1382 {
1383 	if (*ochainp == NULL)
1384 		return (0);
1385 	/* XXX */
1386 	return (0);
1387 }
1388 
1389 /*
1390  * The caller presents a locked *chainp pointing to a HAMMER2_BREF_TYPE_INODE
1391  * with an obj_type of HAMMER2_OBJTYPE_HARDLINK.  This routine will gobble
1392  * the *chainp and return a new locked *chainp representing the file target
1393  * (the original *chainp will be unlocked).
1394  *
1395  * When a match is found the chain representing the original HARDLINK
1396  * will be returned in *ochainp with a ref, but not locked.
1397  *
1398  * When no match is found *chainp is set to NULL and EIO is returned.
1399  * (*ochainp) will still be set to the original chain with a ref but not
1400  * locked.
1401  */
1402 int
1403 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
1404 		      hammer2_chain_t **ochainp)
1405 {
1406 	hammer2_chain_t *chain = *chainp;
1407 	hammer2_chain_t *parent;
1408 	hammer2_inode_t *ip;
1409 	hammer2_inode_t *pip;
1410 	hammer2_key_t lhc;
1411 
1412 	pip = dip;
1413 	hammer2_inode_ref(pip);		/* for loop */
1414 	hammer2_chain_ref(chain);	/* for (*ochainp) */
1415 	*ochainp = chain;
1416 
1417 	/*
1418 	 * Locate the hardlink.  pip is referenced and not locked,
1419 	 * ipp.
1420 	 *
1421 	 * chain is reused.
1422 	 */
1423 	lhc = chain->data->ipdata.inum;
1424 	hammer2_chain_unlock(chain);
1425 	chain = NULL;
1426 
1427 	while ((ip = pip) != NULL) {
1428 		hammer2_inode_lock_ex(ip);
1429 		parent = hammer2_chain_lookup_init(ip->chain, 0);
1430 		hammer2_inode_drop(ip);			/* loop */
1431 		KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
1432 		chain = hammer2_chain_lookup(&parent, lhc, lhc, 0);
1433 		hammer2_chain_lookup_done(parent);
1434 		if (chain)
1435 			break;
1436 		pip = ip->pip;		/* safe, ip held locked */
1437 		if (pip)
1438 			hammer2_inode_ref(pip);		/* loop */
1439 		hammer2_inode_unlock_ex(ip);
1440 	}
1441 
1442 	/*
1443 	 * chain is locked, ip is locked.  Unlock ip, return the locked
1444 	 * chain.  *ipp is already set w/a ref count and not locked.
1445 	 *
1446 	 * (parent is already unlocked).
1447 	 */
1448 	if (ip)
1449 		hammer2_inode_unlock_ex(ip);
1450 	*chainp = chain;
1451 	if (chain) {
1452 		KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
1453 		/* already locked */
1454 		return (0);
1455 	} else {
1456 		return (EIO);
1457 	}
1458 }
1459 
1460 /*
1461  * Find the directory common to both fdip and tdip, hold and return
1462  * its inode.
1463  */
1464 hammer2_inode_t *
1465 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1466 {
1467 	hammer2_inode_t *scan1;
1468 	hammer2_inode_t *scan2;
1469 
1470 	/*
1471 	 * We used to have a depth field but it complicated matters too
1472 	 * much for directory renames.  So now its ugly.  Check for
1473 	 * simple cases before giving up and doing it the expensive way.
1474 	 *
1475 	 * XXX need a bottom-up topology stability lock
1476 	 */
1477 	if (fdip == tdip || fdip == tdip->pip) {
1478 		hammer2_inode_ref(fdip);
1479 		return(fdip);
1480 	}
1481 	if (fdip->pip == tdip) {
1482 		hammer2_inode_ref(tdip);
1483 		return(tdip);
1484 	}
1485 
1486 	/*
1487 	 * XXX not MPSAFE
1488 	 */
1489 	for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1490 		scan2 = tdip;
1491 		while (scan2->pmp == tdip->pmp) {
1492 			if (scan1 == scan2) {
1493 				hammer2_inode_ref(scan1);
1494 				return(scan1);
1495 			}
1496 			scan2 = scan2->pip;
1497 			if (scan2 == NULL)
1498 				break;
1499 		}
1500 	}
1501 	panic("hammer2_inode_common_parent: no common parent %p %p\n",
1502 	      fdip, tdip);
1503 	/* NOT REACHED */
1504 	return(NULL);
1505 }
1506